Don't always declare frag shader INPUT[0] as fragment position.

author Brian <brian.paul@tungstengraphics.com>

Fri, 14 Dec 2007 18:00:46 +0000 (11:00 -0700)

committer Brian <brian.paul@tungstengraphics.com>

Fri, 14 Dec 2007 18:00:46 +0000 (11:00 -0700)
author Brian <brian.paul@tungstengraphics.com>
Fri, 14 Dec 2007 18:00:46 +0000 (11:00 -0700)
committer Brian <brian.paul@tungstengraphics.com>
Fri, 14 Dec 2007 18:00:46 +0000 (11:00 -0700)
diff --git a/src/mesa/pipe/p_state.h b/src/mesa/pipe/p_state.h

index 4e42838..43b710f 100644 (file)
--- a/src/mesa/pipe/p_state.h
+++ b/src/mesa/pipe/p_state.h
@@ -139,6 +139,7 @@ struct pipe_shader_state {
     const struct tgsi_token *tokens;
     ubyte num_inputs;
     ubyte num_outputs;
+   ubyte input_map[PIPE_MAX_SHADER_INPUTS]; /* XXX this may be temporary */
     ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
     ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
     ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
diff --git a/src/mesa/pipe/softpipe/sp_context.h b/src/mesa/pipe/softpipe/sp_context.h

index 2c038de..8fd4493 100644 (file)
--- a/src/mesa/pipe/softpipe/sp_context.h
+++ b/src/mesa/pipe/softpipe/sp_context.h
@@ -110,8 +110,6 @@ struct softpipe_context {
     struct vertex_info vertex_info;
     unsigned attr_mask;
     unsigned nr_frag_attrs;  /**< number of active fragment attribs */
-   boolean need_z;  /**< produce quad/fragment Z values? */
-   boolean need_w;  /**< produce quad/fragment W values? */
     int psize_slot;
  
  #if 0
diff --git a/src/mesa/pipe/softpipe/sp_headers.h b/src/mesa/pipe/softpipe/sp_headers.h

index b9f2b22..0ae31d8 100644 (file)
--- a/src/mesa/pipe/softpipe/sp_headers.h
+++ b/src/mesa/pipe/softpipe/sp_headers.h
@@ -73,6 +73,7 @@ struct quad_header {
     float coverage[QUAD_SIZE];    /** fragment coverage for antialiasing */
  
     const struct tgsi_interp_coef *coef;
+   const struct tgsi_interp_coef *posCoef;
  
     unsigned nr_attrs;
  };
diff --git a/src/mesa/pipe/softpipe/sp_prim_setup.c b/src/mesa/pipe/softpipe/sp_prim_setup.c

index fc96f92..8d8dcea 100644 (file)
--- a/src/mesa/pipe/softpipe/sp_prim_setup.c
+++ b/src/mesa/pipe/softpipe/sp_prim_setup.c
@@ -36,10 +36,12 @@
  #include "sp_context.h"
  #include "sp_headers.h"
  #include "sp_quad.h"
+#include "sp_state.h"
  #include "sp_prim_setup.h"
  #include "pipe/draw/draw_private.h"
  #include "pipe/draw/draw_vertex.h"
  #include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
  
  #define DEBUG_VERTS 0
  
@@ -80,8 +82,11 @@ struct setup_stage {
     float oneoverarea;
  
     struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
+   struct tgsi_interp_coef posCoef;  /* For Z, W */
     struct quad_header quad; 
  
+   uint firstFpInput;  /** Semantic type of first frag input */
+
     struct {
        int left[2];   /**< [0] = row0, [1] = row1 */
        int right[2];
@@ -365,18 +370,17 @@ static boolean setup_sort_vertices( struct setup_stage *setup,
   * \param i  which component of the slot (0..3)
   */
  static void const_coeff( struct setup_stage *setup,
-                        unsigned slot,
-                        unsigned i )
+                         struct tgsi_interp_coef *coef,
+                         uint vertSlot, uint i)
  {
-   assert(slot < PIPE_MAX_SHADER_INPUTS);
     assert(i <= 3);
  
-   setup->coef[slot].dadx[i] = 0;
-   setup->coef[slot].dady[i] = 0;
+   coef->dadx[i] = 0;
+   coef->dady[i] = 0;
  
     /* need provoking vertex info!
      */
-   setup->coef[slot].a0[i] = setup->vprovoke->data[slot][i];
+   coef->a0[i] = setup->vprovoke->data[vertSlot][i];
  }
  
  
@@ -385,19 +389,20 @@ static void const_coeff( struct setup_stage *setup,
   * for a triangle.
   */
  static void tri_linear_coeff( struct setup_stage *setup,
-                              unsigned slot,
-                              unsigned i)
+                              struct tgsi_interp_coef *coef,
+                              uint vertSlot, uint i)
  {
-   float botda = setup->vmid->data[slot][i] - setup->vmin->data[slot][i];
-   float majda = setup->vmax->data[slot][i] - setup->vmin->data[slot][i];
+   float botda = setup->vmid->data[vertSlot][i] - setup->vmin->data[vertSlot][i];
+   float majda = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i];
     float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
     float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
-   
-   assert(slot < PIPE_MAX_SHADER_INPUTS);
+   float dadx = a * setup->oneoverarea;
+   float dady = b * setup->oneoverarea;
+
     assert(i <= 3);
  
-   setup->coef[slot].dadx[i] = a * setup->oneoverarea;
-   setup->coef[slot].dady[i] = b * setup->oneoverarea;
+   coef->dadx[i] = dadx;
+   coef->dady[i] = dady;
  
     /* calculate a0 as the value which would be sampled for the
      * fragment at (0,0), taking into account that we want to sample at
@@ -411,9 +416,9 @@ static void tri_linear_coeff( struct setup_stage *setup,
      * to define a0 as the sample at a pixel center somewhere near vmin
      * instead - i'll switch to this later.
      */
-   setup->coef[slot].a0[i] = (setup->vmin->data[slot][i] - 
-                           (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) + 
-                            setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f)));
+   coef->a0[i] = (setup->vmin->data[vertSlot][i] - 
+                  (dadx * (setup->vmin->data[0][0] - 0.5f) + 
+                   dady * (setup->vmin->data[0][1] - 0.5f)));
  
     /*
     _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
@@ -434,39 +439,68 @@ static void tri_linear_coeff( struct setup_stage *setup,
   * divide the interpolated value by the interpolated W at that fragment.
   */
  static void tri_persp_coeff( struct setup_stage *setup,
-                             unsigned slot,
-                             unsigned i )
+                             struct tgsi_interp_coef *coef,
+                             uint vertSlot, uint i)
  {
-   /* premultiply by 1/w:
+   /* premultiply by 1/w  (v->data[0][3] is always W):
      */
-   float mina = setup->vmin->data[slot][i] * setup->vmin->data[0][3];
-   float mida = setup->vmid->data[slot][i] * setup->vmid->data[0][3];
-   float maxa = setup->vmax->data[slot][i] * setup->vmax->data[0][3];
-
+   float mina = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3];
+   float mida = setup->vmid->data[vertSlot][i] * setup->vmid->data[0][3];
+   float maxa = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3];
     float botda = mida - mina;
     float majda = maxa - mina;
     float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
     float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
+   float dadx = a * setup->oneoverarea;
+   float dady = b * setup->oneoverarea;
        
     /*
-   printf("tri persp %d,%d: %f %f %f\n", slot, i,
-          setup->vmin->data[slot][i],
-          setup->vmid->data[slot][i],
-          setup->vmax->data[slot][i]
+   printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
+          setup->vmin->data[vertSlot][i],
+          setup->vmid->data[vertSlot][i],
+          setup->vmax->data[vertSlot][i]
            );
     */
-
-   assert(slot < PIPE_MAX_SHADER_INPUTS);
     assert(i <= 3);
  
-   setup->coef[slot].dadx[i] = a * setup->oneoverarea;
-   setup->coef[slot].dady[i] = b * setup->oneoverarea;
-   setup->coef[slot].a0[i] = (mina - 
-                           (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) + 
-                            setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f)));
+   coef->dadx[i] = dadx;
+   coef->dady[i] = dady;
+   coef->a0[i] = (mina - 
+                  (dadx * (setup->vmin->data[0][0] - 0.5f) + 
+                   dady * (setup->vmin->data[0][1] - 0.5f)));
+}
+
+
+/**
+ * Special coefficient setup for gl_FragCoord.
+ * X and Y are trivial, though Y has to be inverted for OpenGL.
+ * Z and W are copied from posCoef which should have already been computed.
+ * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
+ */
+static void
+setup_fragcoord_coeff(struct setup_stage *setup)
+{
+   const int winHeight = setup->softpipe->framebuffer.cbufs[0]->height;
+   /*X*/
+   setup->coef[0].a0[0] = 0;
+   setup->coef[0].dadx[0] = 1.0;
+   setup->coef[0].dady[0] = 0.0;
+   /*Y*/
+   setup->coef[0].a0[1] = winHeight - 1;
+   setup->coef[0].dadx[1] = 0.0;
+   setup->coef[0].dady[1] = -1.0;
+   /*Z*/
+   setup->coef[0].a0[2] = setup->posCoef.a0[2];
+   setup->coef[0].dadx[2] = setup->posCoef.dadx[2];
+   setup->coef[0].dady[2] = setup->posCoef.dady[2];
+   /*w*/
+   setup->coef[0].a0[3] = setup->posCoef.a0[3];
+   setup->coef[0].dadx[3] = setup->posCoef.dadx[3];
+   setup->coef[0].dady[3] = setup->posCoef.dady[3];
  }
  
  
+
  /**
   * Compute the setup->coef[] array dadx, dady, a0 values.
   * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
@@ -474,36 +508,67 @@ static void tri_persp_coeff( struct setup_stage *setup,
  static void setup_tri_coefficients( struct setup_stage *setup )
  {
     const enum interp_mode *interp = setup->softpipe->vertex_info.interp_mode;
-   unsigned slot, j;
+#define USE_INPUT_MAP 0
+#if USE_INPUT_MAP
+   const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
+#endif
+   uint fragSlot;
  
     /* z and w are done by linear interpolation:
      */
-   tri_linear_coeff(setup, 0, 2);
-   tri_linear_coeff(setup, 0, 3);
+   tri_linear_coeff(setup, &setup->posCoef, 0, 2);
+   tri_linear_coeff(setup, &setup->posCoef, 0, 3);
  
     /* setup interpolation for all the remaining attributes:
      */
-   for (slot = 1; slot < setup->quad.nr_attrs; slot++) {
-      switch (interp[slot]) {
-      case INTERP_CONSTANT:
-        for (j = 0; j < NUM_CHANNELS; j++)
-           const_coeff(setup, slot, j);
-        break;
-      
-      case INTERP_LINEAR:
-        for (j = 0; j < NUM_CHANNELS; j++)
-           tri_linear_coeff(setup, slot, j);
-        break;
-
-      case INTERP_PERSPECTIVE:
-        for (j = 0; j < NUM_CHANNELS; j++)
-           tri_persp_coeff(setup, slot, j);
-        break;
-
-      default:
-         /* invalid interp mode */
-         assert(0);
+   for (fragSlot = 0; fragSlot < setup->quad.nr_attrs; fragSlot++) {
+      /* which vertex output maps to this fragment input: */
+#if !USE_INPUT_MAP
+      uint vertSlot;
+      if (setup->firstFpInput == TGSI_SEMANTIC_POSITION) {
+         if (fragSlot == 0) {
+            setup_fragcoord_coeff(setup);
+            continue;
+         }
+         vertSlot = fragSlot;
+      }
+      else {
+         vertSlot = fragSlot + 1;
        }
+
+#else
+      uint vertSlot = fs->input_map[fragSlot];
+
+      if (vertSlot == 0) {
+         /* special case: shader is reading gl_FragCoord */
+         /* XXX with a new INTERP_POSITION token, we could just add a
+          * new case to the switch below.
+          */
+         setup_fragcoord_coeff(setup);
+      }
+      else {
+#endif
+         uint j;
+         switch (interp[vertSlot]) {
+         case INTERP_CONSTANT:
+            for (j = 0; j < NUM_CHANNELS; j++)
+               const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+            break;
+         case INTERP_LINEAR:
+            for (j = 0; j < NUM_CHANNELS; j++)
+               tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+            break;
+         case INTERP_PERSPECTIVE:
+            for (j = 0; j < NUM_CHANNELS; j++)
+               tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+            break;
+         default:
+            /* invalid interp mode */
+            assert(0);
+         }
+#if USE_INPUT_MAP
+      }
+#endif
     }
  }
  
@@ -660,17 +725,18 @@ static void setup_tri( struct draw_stage *stage,
   * for a line.
   */
  static void
-line_linear_coeff(struct setup_stage *setup, unsigned slot, unsigned i)
+line_linear_coeff(struct setup_stage *setup,
+                  struct tgsi_interp_coef *coef,
+                  uint vertSlot, uint i)
  {
-   const float da = setup->vmax->data[slot][i] - setup->vmin->data[slot][i];
+   const float da = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i];
     const float dadx = da * setup->emaj.dx * setup->oneoverarea;
     const float dady = da * setup->emaj.dy * setup->oneoverarea;
-   setup->coef[slot].dadx[i] = dadx;
-   setup->coef[slot].dady[i] = dady;
-   setup->coef[slot].a0[i]
-      = (setup->vmin->data[slot][i] - 
-         (dadx * (setup->vmin->data[0][0] - 0.5f) + 
-          dady * (setup->vmin->data[0][1] - 0.5f)));
+   coef->dadx[i] = dadx;
+   coef->dady[i] = dady;
+   coef->a0[i] = (setup->vmin->data[vertSlot][i] - 
+                  (dadx * (setup->vmin->data[0][0] - 0.5f) + 
+                   dady * (setup->vmin->data[0][1] - 0.5f)));
  }
  
  
@@ -679,21 +745,21 @@ line_linear_coeff(struct setup_stage *setup, unsigned slot, unsigned i)
   * for a line.
   */
  static void
-line_persp_coeff(struct setup_stage *setup, unsigned slot, unsigned i)
+line_persp_coeff(struct setup_stage *setup,
+                  struct tgsi_interp_coef *coef,
+                  uint vertSlot, uint i)
  {
     /* XXX double-check/verify this arithmetic */
-   const float a0 = setup->vmin->data[slot][i] * setup->vmin->data[0][3];
-   const float a1 = setup->vmax->data[slot][i] * setup->vmin->data[0][3];
+   const float a0 = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3];
+   const float a1 = setup->vmax->data[vertSlot][i] * setup->vmin->data[0][3];
     const float da = a1 - a0;
     const float dadx = da * setup->emaj.dx * setup->oneoverarea;
     const float dady = da * setup->emaj.dy * setup->oneoverarea;
-   setup->coef[slot].dadx[i] = dadx;
-   setup->coef[slot].dady[i] = dady;
-   setup->coef[slot].a0[i]
-      = (setup->vmin->data[slot][i] - 
-         (dadx * (setup->vmin->data[0][0] - 0.5f) + 
-          dady * (setup->vmin->data[0][1] - 0.5f)));
-
+   coef->dadx[i] = dadx;
+   coef->dady[i] = dady;
+   coef->a0[i] = (setup->vmin->data[vertSlot][i] - 
+                  (dadx * (setup->vmin->data[0][0] - 0.5f) + 
+                   dady * (setup->vmin->data[0][1] - 0.5f)));
  }
  
  
@@ -705,7 +771,8 @@ static INLINE void
  setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim)
  {
     const enum interp_mode *interp = setup->softpipe->vertex_info.interp_mode;
-   unsigned slot, j;
+   const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
+   unsigned fragSlot;
  
     /* use setup->vmin, vmax to point to vertices */
     setup->vprovoke = prim->v[1];
@@ -720,31 +787,39 @@ setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim)
  
     /* z and w are done by linear interpolation:
      */
-   line_linear_coeff(setup, 0, 2);
-   line_linear_coeff(setup, 0, 3);
+   line_linear_coeff(setup, &setup->posCoef, 0, 2);
+   line_linear_coeff(setup, &setup->posCoef, 0, 3);
  
     /* setup interpolation for all the remaining attributes:
      */
-   for (slot = 1; slot < setup->quad.nr_attrs; slot++) {
-      switch (interp[slot]) {
-      case INTERP_CONSTANT:
-        for (j = 0; j < NUM_CHANNELS; j++)
-           const_coeff(setup, slot, j);
-        break;
-      
-      case INTERP_LINEAR:
-        for (j = 0; j < NUM_CHANNELS; j++)
-           line_linear_coeff(setup, slot, j);
-        break;
-
-      case INTERP_PERSPECTIVE:
-        for (j = 0; j < NUM_CHANNELS; j++)
-           line_persp_coeff(setup, slot, j);
-        break;
-
-      default:
-         /* invalid interp mode */
-         assert(0);
+   for (fragSlot = 0; fragSlot < setup->quad.nr_attrs; fragSlot++) {
+      /* which vertex output maps to this fragment input: */
+      uint vertSlot = fs->input_map[fragSlot];
+
+      if (vertSlot == 0) {
+         /* special case: shader is reading gl_FragCoord */
+         setup_fragcoord_coeff(setup);
+      }
+      else {
+         uint j;
+         switch (interp[vertSlot]) {
+         case INTERP_CONSTANT:
+            for (j = 0; j < NUM_CHANNELS; j++)
+               const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+            break;
+         case INTERP_LINEAR:
+            for (j = 0; j < NUM_CHANNELS; j++)
+               line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+            break;
+         case INTERP_PERSPECTIVE:
+            for (j = 0; j < NUM_CHANNELS; j++)
+               line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+            break;
+            
+         default:
+            /* invalid interp mode */
+            assert(0);
+         }
        }
     }
  }
@@ -910,14 +985,15 @@ setup_line(struct draw_stage *stage, struct prim_header *prim)
  
  
  static void
-point_persp_coeff(struct setup_stage *setup, const struct vertex_header *vert,
-                  uint slot, uint i)
+point_persp_coeff(struct setup_stage *setup,
+                  const struct vertex_header *vert,
+                  struct tgsi_interp_coef *coef,
+                  uint vertSlot, uint i)
  {
-   assert(slot < PIPE_MAX_SHADER_INPUTS);
     assert(i <= 3);
-   setup->coef[slot].dadx[i] = 0.0F;
-   setup->coef[slot].dady[i] = 0.0F;
-   setup->coef[slot].a0[i] = vert->data[slot][i] * vert->data[0][3];
+   coef->dadx[i] = 0.0F;
+   coef->dady[i] = 0.0F;
+   coef->a0[i] = vert->data[vertSlot][i] * vert->data[0][3];
  }
  
  
@@ -930,6 +1006,7 @@ static void
  setup_point(struct draw_stage *stage, struct prim_header *prim)
  {
     struct setup_stage *setup = setup_stage( stage );
+   const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
     const enum interp_mode *interp = setup->softpipe->vertex_info.interp_mode;
     const struct vertex_header *v0 = prim->v[0];
     const int sizeAttr = setup->softpipe->psize_slot;
@@ -940,7 +1017,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim)
     const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth;
     const float x = v0->data[0][0];  /* Note: data[0] is always position */
     const float y = v0->data[0][1];
-   unsigned slot, j;
+   uint fragSlot;
  
     /* For points, all interpolants are constant-valued.
      * However, for point sprites, we'll need to setup texcoords appropriately.
@@ -959,22 +1036,36 @@ setup_point(struct draw_stage *stage, struct prim_header *prim)
      * probably should be ruled out on that basis.
      */
     setup->vprovoke = prim->v[0];
-   const_coeff(setup, 0, 2);
-   const_coeff(setup, 0, 3);
-   for (slot = 1; slot < setup->quad.nr_attrs; slot++) {
-      switch (interp[slot]) {
-      case INTERP_CONSTANT:
-         /* fall-through */
-      case INTERP_LINEAR:
-         for (j = 0; j < NUM_CHANNELS; j++)
-            const_coeff(setup, slot, j);
-         break;
-      case INTERP_PERSPECTIVE:
-         for (j = 0; j < NUM_CHANNELS; j++)
-            point_persp_coeff(setup, v0, slot, j);
-         break;
-      default:
-         assert(0);
+
+   /* setup Z, W */
+   const_coeff(setup, &setup->posCoef, 0, 2);
+   const_coeff(setup, &setup->posCoef, 0, 3);
+
+   for (fragSlot = 0; fragSlot < setup->quad.nr_attrs; fragSlot++) {
+      /* which vertex output maps to this fragment input: */
+      uint vertSlot = fs->input_map[fragSlot];
+
+      if (vertSlot == 0) {
+         /* special case: shader is reading gl_FragCoord */
+         setup_fragcoord_coeff(setup);
+      }
+      else {
+         uint j;
+         switch (interp[vertSlot]) {
+         case INTERP_CONSTANT:
+            /* fall-through */
+         case INTERP_LINEAR:
+            for (j = 0; j < NUM_CHANNELS; j++)
+               const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
+            break;
+         case INTERP_PERSPECTIVE:
+            for (j = 0; j < NUM_CHANNELS; j++)
+               point_persp_coeff(setup, setup->vprovoke,
+                                 &setup->coef[fragSlot], vertSlot, j);
+            break;
+         default:
+            assert(0);
+         }
        }
     }
  
@@ -1108,9 +1199,12 @@ static void setup_begin( struct draw_stage *stage )
  {
     struct setup_stage *setup = setup_stage(stage);
     struct softpipe_context *sp = setup->softpipe;
+   const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
  
     setup->quad.nr_attrs = setup->softpipe->nr_frag_attrs;
  
+   setup->firstFpInput = fs->input_semantic_name[0];
+
     sp->quad.first->begin(sp->quad.first);
  }
  
@@ -1151,6 +1245,7 @@ struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe )
     setup->stage.destroy = render_destroy;
  
     setup->quad.coef = setup->coef;
+   setup->quad.posCoef = &setup->posCoef;
  
     return &setup->stage;
  }
diff --git a/src/mesa/pipe/softpipe/sp_quad_earlyz.c b/src/mesa/pipe/softpipe/sp_quad_earlyz.c

index 3abd1f1..22ea990 100644 (file)
--- a/src/mesa/pipe/softpipe/sp_quad_earlyz.c
+++ b/src/mesa/pipe/softpipe/sp_quad_earlyz.c
@@ -47,9 +47,9 @@ earlyz_quad(
  {
     const float fx = (float) quad->x0;
     const float fy = (float) quad->y0;
-   const float dzdx = quad->coef[0].dadx[2];
-   const float dzdy = quad->coef[0].dady[2];
-   const float z0 = quad->coef[0].a0[2] + dzdx * fx + dzdy * fy;
+   const float dzdx = quad->posCoef->dadx[2];
+   const float dzdy = quad->posCoef->dady[2];
+   const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
  
     quad->outputs.depth[0] = z0;
     quad->outputs.depth[1] = z0 + dzdx;
diff --git a/src/mesa/pipe/softpipe/sp_quad_fs.c b/src/mesa/pipe/softpipe/sp_quad_fs.c

index da590b2..6e7e7eb 100644 (file)
--- a/src/mesa/pipe/softpipe/sp_quad_fs.c
+++ b/src/mesa/pipe/softpipe/sp_quad_fs.c
@@ -74,15 +74,49 @@ quad_shade_stage(struct quad_stage *qs)
  }
  
  
+/**
+ * Compute quad X,Y,Z,W for the four fragments in a quad.
+ * Note that we only need to "compute" X and Y for the upper-left fragment.
+ * We could do less work if we're not depth testing, or there's no
+ * perspective-corrected attributes, but that's seldom.
+ */
+static void
+setup_pos_vector(const struct tgsi_interp_coef *coef,
+                 float x, float y,
+                 struct tgsi_exec_vector *quadpos)
+{
+   uint chan;
+   /* do X */
+   quadpos->xyzw[0].f[0] = x;
+   /* do Y */
+   quadpos->xyzw[1].f[0] = y;
+   /* do Z and W for all fragments in the quad */
+   for (chan = 2; chan < 4; chan++) {
+      const float dadx = coef->dadx[chan];
+      const float dady = coef->dady[chan];
+      const float a0 = coef->a0[chan] + dadx * x + dady * y;
+      quadpos->xyzw[chan].f[0] = a0;
+      quadpos->xyzw[chan].f[1] = a0 + dadx;
+      quadpos->xyzw[chan].f[2] = a0 + dady;
+      quadpos->xyzw[chan].f[3] = a0 + dadx + dady;
+   }
+}
+
+
  typedef void (XSTDCALL *codegen_function)(
     const struct tgsi_exec_vector *input,
     struct tgsi_exec_vector *output,
     float (*constant)[4],
     struct tgsi_exec_vector *temporary,
-   const struct tgsi_interp_coef *coef );
+   const struct tgsi_interp_coef *coef
+#if 0
+   ,const struct tgsi_exec_vector *quadPos
+#endif
+ );
+
  
-/* This should be done by the fragment shader execution unit (code
- * generated from the decl instructions).  Do it here for now.
+/**
+ * Execute fragment shader for the four fragments in the quad.
   */
  static void
  shade_quad(
@@ -91,33 +125,15 @@ shade_quad(
  {
     struct quad_shade_stage *qss = quad_shade_stage( qs );
     struct softpipe_context *softpipe = qs->softpipe;
-   const float fx = (float) quad->x0;
-   const float fy = (float) quad->y0;
     struct tgsi_exec_machine *machine = &qss->machine;
  
-   /* Consts does not require 16 byte alignment. */
+   /* Consts do not require 16 byte alignment. */
     machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
  
     machine->InterpCoefs = quad->coef;
  
-#if 1 /* XXX only do this if the fp really reads fragment.position */
-   machine->Inputs[0].xyzw[0].f[0] = fx;
-   machine->Inputs[0].xyzw[0].f[1] = fx + 1.0f;
-   machine->Inputs[0].xyzw[0].f[2] = fx;
-   machine->Inputs[0].xyzw[0].f[3] = fx + 1.0f;
-
-   /* XXX for OpenGL we need to invert the Y pos here (y=0=top).
-    * but that'll mess up linear/perspective interpolation of other
-    * attributes...
-    */
-   machine->Inputs[0].xyzw[1].f[0] = fy;
-   machine->Inputs[0].xyzw[1].f[1] = fy;
-   machine->Inputs[0].xyzw[1].f[2] = fy + 1.0f;
-   machine->Inputs[0].xyzw[1].f[3] = fy + 1.0f;
-#endif
-
-   machine->QuadX = quad->x0;
-   machine->QuadY = quad->y0;
+   /* Compute X, Y, Z, W vals for this quad */
+   setup_pos_vector(quad->posCoef, quad->x0, quad->y0, &machine->QuadPos);
  
     /* run shader */
  #if defined(__i386__) || defined(__386__)
@@ -130,9 +146,9 @@ shade_quad(
           machine->Temps,
           machine->InterpCoefs
  #if 0
-         ,quad->x0, quad->y0
+         ,machine->QuadPos
  #endif
- );
+           );
        quad->mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]);
     }
     else
diff --git a/src/mesa/pipe/softpipe/sp_state_derived.c b/src/mesa/pipe/softpipe/sp_state_derived.c

index c4f1a0a..736ac1c 100644 (file)
--- a/src/mesa/pipe/softpipe/sp_state_derived.c
+++ b/src/mesa/pipe/softpipe/sp_state_derived.c
@@ -51,18 +51,11 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe )
  
     memset(vinfo, 0, sizeof(*vinfo));
  
-   if (softpipe->depth_stencil->depth.enabled)
-      softpipe->need_z = TRUE;
-   else
-      softpipe->need_z = FALSE;
-   softpipe->need_w = FALSE;
  
     if (fs->input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
        /* Need Z if depth test is enabled or the fragment program uses the
         * fragment position (XYZW).
         */
-      softpipe->need_z = TRUE;
-      softpipe->need_w = TRUE;
     }
  
     softpipe->psize_slot = -1;
@@ -121,7 +114,6 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe )
        case TGSI_SEMANTIC_GENERIC:
           /* this includes texcoords and varying vars */
           draw_emit_vertex_attr(vinfo, FORMAT_4F, INTERP_PERSPECTIVE);
-         softpipe->need_w = TRUE;
           break;
  
        default:
@@ -129,7 +121,11 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe )
        }
     }
  
+#if 00
     softpipe->nr_frag_attrs = vinfo->num_attribs;
+#else
+   softpipe->nr_frag_attrs = fs->num_inputs;
+#endif
  
     /* We want these after all other attribs since they won't get passed
      * to the fragment shader.  All prior vertex output attribs should match
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.c b/src/mesa/pipe/tgsi/exec/tgsi_exec.c

index 03997f9..1f43f36 100644 (file)
--- a/src/mesa/pipe/tgsi/exec/tgsi_exec.c
+++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.c
@@ -1352,7 +1352,8 @@ linear_interpolation(
     unsigned attrib,
     unsigned chan )
  {
-   const float x = mach->QuadX, y = mach->QuadY;
+   const float x = mach->QuadPos.xyzw[0].f[0];
+   const float y = mach->QuadPos.xyzw[1].f[0];
     const float dadx = mach->InterpCoefs[attrib].dadx[chan];
     const float dady = mach->InterpCoefs[attrib].dady[chan];
     const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
@@ -1368,14 +1369,17 @@ perspective_interpolation(
     unsigned attrib,
     unsigned chan )
  {
-   const float x = mach->QuadX, y = mach->QuadY;
+   const float x = mach->QuadPos.xyzw[0].f[0];
+   const float y = mach->QuadPos.xyzw[1].f[0];
     const float dadx = mach->InterpCoefs[attrib].dadx[chan];
     const float dady = mach->InterpCoefs[attrib].dady[chan];
     const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
-   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / mach->Inputs[0].xyzw[3].f[0];
-   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / mach->Inputs[0].xyzw[3].f[1];
-   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / mach->Inputs[0].xyzw[3].f[2];
-   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / mach->Inputs[0].xyzw[3].f[3];
+   const float *w = mach->QuadPos.xyzw[3].f;
+   /* divide by W here */
+   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
+   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
+   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
+   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
  }
  
  
@@ -1400,17 +1404,6 @@ exec_declaration(
           last = decl->u.DeclarationRange.Last;
           mask = decl->Declaration.UsageMask;
  
-         /* Do not touch WPOS.xy */
-         if( first == 0 ) {
-            mask &= ~TGSI_WRITEMASK_XY;
-            if( mask == TGSI_WRITEMASK_NONE ) {
-               first++;
-               if( first > last ) {
-                  return;
-               }
-            }
-         }
-
           switch( decl->Interpolation.Interpolate ) {
           case TGSI_INTERPOLATE_CONSTANT:
              interp = constant_interpolation;
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.h b/src/mesa/pipe/tgsi/exec/tgsi_exec.h

index 8d166bb..db92e28 100644 (file)
--- a/src/mesa/pipe/tgsi/exec/tgsi_exec.h
+++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.h
@@ -170,7 +170,6 @@ struct tgsi_exec_machine
     struct tgsi_exec_vector       *Inputs;
     struct tgsi_exec_vector       *Outputs;
     const struct tgsi_token       *Tokens;
-   float                         QuadX, QuadY; /**< for frag progs only */
     unsigned                      Processor;
  
     /* GEOMETRY processor only. */
@@ -178,6 +177,7 @@ struct tgsi_exec_machine
  
     /* FRAGMENT processor only. */
     const struct tgsi_interp_coef *InterpCoefs;
+   struct tgsi_exec_vector       QuadPos;
  
     /* Conditional execution masks */
     uint CondMask;  /**< For IF/ELSE/ENDIF */
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c

index 4ec10ba..33372b0 100644 (file)
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -151,8 +151,7 @@ find_translated_vp(struct st_context *st,
  {
     static const GLuint UNUSED = ~0;
     struct translated_vertex_program *xvp;
-   const GLbitfield fragInputsRead
-      = stfp->Base.Base.InputsRead | FRAG_BIT_WPOS;
+   const GLbitfield fragInputsRead = stfp->Base.Base.InputsRead;
  
     /*
      * Translate fragment program if needed.
@@ -206,6 +205,7 @@ find_translated_vp(struct st_context *st,
     if (xvp->serialNo != stvp->serialNo) {
        GLuint outAttr, dummySlot;
        const GLbitfield outputsWritten = stvp->Base.Base.OutputsWritten;
+      GLuint numVpOuts = 0;
  
        /* Compute mapping of vertex program outputs to slots, which depends
         * on the fragment program's input->slot mapping.
@@ -214,11 +214,24 @@ find_translated_vp(struct st_context *st,
           /* set default: */
           xvp->output_to_slot[outAttr] = UNUSED;
  
-         if (outputsWritten & (1 << outAttr)) {
+         if (outAttr == VERT_RESULT_HPOS) {
+            /* always put xformed position into slot zero */
+            xvp->output_to_slot[VERT_RESULT_HPOS] = 0;
+            numVpOuts++;
+         }
+         else if (outputsWritten & (1 << outAttr)) {
              /* see if the frag prog wants this vert output */
-            GLint fpIn = vp_out_to_fp_in(outAttr);
-            if (fpIn >= 0) {
-               xvp->output_to_slot[outAttr] = stfp->input_to_slot[fpIn];
+            GLint fpInAttrib = vp_out_to_fp_in(outAttr);
+            if (fpInAttrib >= 0) {
+               GLuint fpInSlot = stfp->input_to_slot[fpInAttrib];
+               GLuint vpOutSlot = stfp->fs->state.input_map[fpInSlot];
+               xvp->output_to_slot[outAttr] = vpOutSlot;
+               numVpOuts++;
+            }
+            else if (outAttr == VERT_RESULT_BFC0 ||
+                     outAttr == VERT_RESULT_BFC1) {
+               /* backface colors go into last slots */
+               xvp->output_to_slot[outAttr] = numVpOuts++;
              }
           }
        }
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c

index b392edf..27dab5b 100644 (file)
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -675,44 +675,22 @@ tgsi_translate_mesa_program(
     if (procType == TGSI_PROCESSOR_FRAGMENT) {
        for (i = 0; i < numInputs; i++) {
           struct tgsi_full_declaration fulldecl;
-         switch (inputSemanticName[i]) {
-         case TGSI_SEMANTIC_POSITION:
-            /* Fragment XY pos */
-            fulldecl = make_input_decl(i,
-                                       GL_TRUE, TGSI_INTERPOLATE_CONSTANT,
-                                       TGSI_WRITEMASK_XY,
-                                       GL_TRUE, TGSI_SEMANTIC_POSITION, 0 );
-            ti += tgsi_build_full_declaration(
-                                              &fulldecl,
-                                              &tokens[ti],
-                                              header,
-                                              maxTokens - ti );
-            /* Fragment ZW pos */
-            fulldecl = make_input_decl(i,
-                                       GL_TRUE, TGSI_INTERPOLATE_LINEAR,
-                                       TGSI_WRITEMASK_ZW,
-                                       GL_TRUE, TGSI_SEMANTIC_POSITION, 0 );
-            ti += tgsi_build_full_declaration(&fulldecl,
-                                              &tokens[ti],
-                                              header,
-                                              maxTokens - ti );
-            break;
-         default:
-            fulldecl = make_input_decl(i,
-                                       GL_TRUE, interpMode[i],
-                                       TGSI_WRITEMASK_XYZW,
-                                       GL_TRUE, inputSemanticName[i],
-                                       inputSemanticIndex[i]);
-            ti += tgsi_build_full_declaration(&fulldecl,
-                                              &tokens[ti],
-                                              header,
-                                              maxTokens - ti );
-            break;
-         }
+         fulldecl = make_input_decl(i,
+                                    GL_TRUE, interpMode[i],
+                                    TGSI_WRITEMASK_XYZW,
+                                    GL_TRUE, inputSemanticName[i],
+                                    inputSemanticIndex[i]);
+         ti += tgsi_build_full_declaration(&fulldecl,
+                                           &tokens[ti],
+                                           header,
+                                           maxTokens - ti );
        }
     }
     else {
        /* vertex prog */
+      /* XXX: this could probaby be merged with the clause above.
+       * the only difference is the semantic tags.
+       */
        for (i = 0; i < numInputs; i++) {
           struct tgsi_full_declaration fulldecl;
           fulldecl = make_input_decl(i,
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c

index e64bf14..fe22233 100644 (file)
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -47,7 +47,7 @@
  #include "st_mesa_to_tgsi.h"
  
  
-#define TGSI_DEBUG 0
+#define TGSI_DEBUG 01
  
  
  /**
@@ -283,16 +283,17 @@ st_translate_fragment_program(struct st_context *st,
     const struct cso_fragment_shader *cso;
     GLuint interpMode[16];  /* XXX size? */
     GLuint attr;
-   GLbitfield inputsRead = stfp->Base.Base.InputsRead;
-
-   /* For software rendering, we always need the fragment input position
-    * in order to calculate interpolated values.
-    * For i915, we always want to emit the semantic info for position.
-    */
-   inputsRead |= FRAG_BIT_WPOS;
+   const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
+   GLuint vslot = 0;
  
     memset(&fs, 0, sizeof(fs));
  
+   /* which vertex output goes to the first fragment input: */
+   if (inputsRead & FRAG_BIT_WPOS)
+      vslot = 0;
+   else
+      vslot = 1;
+
     /*
      * Convert Mesa program inputs to TGSI input register semantics.
      */
@@ -300,15 +301,17 @@ st_translate_fragment_program(struct st_context *st,
        if (inputsRead & (1 << attr)) {
           const GLuint slot = fs.num_inputs;
  
-         fs.num_inputs++;
-
           defaultInputMapping[attr] = slot;
  
+         fs.input_map[slot] = vslot++;
+
+         fs.num_inputs++;
+
           switch (attr) {
           case FRAG_ATTRIB_WPOS:
              fs.input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
              fs.input_semantic_index[slot] = 0;
-            interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
+            interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
              break;
           case FRAG_ATTRIB_COL0:
              fs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
author	Brian <brian.paul@tungstengraphics.com>
	Fri, 14 Dec 2007 18:00:46 +0000 (11:00 -0700)
committer	Brian <brian.paul@tungstengraphics.com>
	Fri, 14 Dec 2007 18:00:46 +0000 (11:00 -0700)
src/mesa/pipe/p_state.h		patch \| blob \| history
src/mesa/pipe/softpipe/sp_context.h		patch \| blob \| history
src/mesa/pipe/softpipe/sp_headers.h		patch \| blob \| history
src/mesa/pipe/softpipe/sp_prim_setup.c		patch \| blob \| history
src/mesa/pipe/softpipe/sp_quad_earlyz.c		patch \| blob \| history
src/mesa/pipe/softpipe/sp_quad_fs.c		patch \| blob \| history
src/mesa/pipe/softpipe/sp_state_derived.c		patch \| blob \| history
src/mesa/pipe/tgsi/exec/tgsi_exec.c		patch \| blob \| history
src/mesa/pipe/tgsi/exec/tgsi_exec.h		patch \| blob \| history
src/mesa/state_tracker/st_atom_shader.c		patch \| blob \| history
src/mesa/state_tracker/st_mesa_to_tgsi.c		patch \| blob \| history
src/mesa/state_tracker/st_program.c		patch \| blob \| history