llvmpipe: more wip on coefficients

author Keith Whitwell <keithw@vmware.com>

Fri, 9 Oct 2009 09:23:53 +0000 (10:23 +0100)

committer Keith Whitwell <keithw@vmware.com>

Fri, 9 Oct 2009 09:23:53 +0000 (10:23 +0100)
author Keith Whitwell <keithw@vmware.com>
Fri, 9 Oct 2009 09:23:53 +0000 (10:23 +0100)
committer Keith Whitwell <keithw@vmware.com>
Fri, 9 Oct 2009 09:23:53 +0000 (10:23 +0100)
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c

index efd9112..382a52e 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -29,23 +29,23 @@
   * Binning code for triangles
   */
  
-#include "lp_setup.h"
-#include "lp_state.h"
+#include "lp_setup_context.h"
  #include "util/u_math.h"
  #include "util/u_memory.h"
  
+#define NUM_CHANNELS 4
  
  /**
   * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
   */
  static void constant_coef( struct lp_rast_triangle *tri,
-                          const float (*v3)[4],
-                          unsigned vert_attr,
-                          unsigned i )
+                           unsigned slot,
+                          const float value,
+                           unsigned i )
  {
-   tri->inputs.a0[i] = v3[vert_attr][i];
-   tri->inputs.dadx[i] = 0;
-   tri->inputs.dady[i] = 0;
+   tri->inputs.a0[slot][i] = value;
+   tri->inputs.dadx[slot][i] = 0;
+   tri->inputs.dady[slot][i] = 0;
  }
  
  /**
@@ -53,45 +53,40 @@ static void constant_coef( struct lp_rast_triangle *tri,
   * for a triangle.
   */
  static void linear_coef( struct lp_rast_triangle *tri,
-                         unsigned input,
-                        const float (*v1)[4],
-                        const float (*v2)[4],
-                        const float (*v3)[4],
-                        unsigned vert_attr)
+                         unsigned slot,
+                         const float (*v1)[4],
+                         const float (*v2)[4],
+                         const float (*v3)[4],
+                         unsigned vert_attr,
+                         unsigned i)
  {
-   unsigned i;
-
-   input *= 4;
-
-   for (i = 0; i < NUM_CHANNELS; i++) {
-      float a1 = v1[vert_attr][i];
-      float a2 = v2[vert_attr][i];
-      float a3 = v3[vert_attr][i];
-
-      float da12 = a1 - a2;
-      float da31 = a3 - a1;
-      float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
-      float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
-
-      tri->inputs.dadx[input+i] = dadx;
-      tri->inputs.dady[input+i] = dady;
-
-      /* calculate a0 as the value which would be sampled for the
-       * fragment at (0,0), taking into account that we want to sample at
-       * pixel centers, in other words (0.5, 0.5).
-       *
-       * this is neat but unfortunately not a good way to do things for
-       * triangles with very large values of dadx or dady as it will
-       * result in the subtraction and re-addition from a0 of a very
-       * large number, which means we'll end up loosing a lot of the
-       * fractional bits and precision from a0.  the way to fix this is
-       * to define a0 as the sample at a pixel center somewhere near vmin
-       * instead - i'll switch to this later.
-       */
-      tri->inputs.a0[input+i] = (v1[vert_attr][i] -
-                                 (dadx * (v1[0][0] - 0.5f) +
-                                  dady * (v1[0][1] - 0.5f)));
-   }
+   float a1 = v1[vert_attr][i];
+   float a2 = v2[vert_attr][i];
+   float a3 = v3[vert_attr][i];
+
+   float da12 = a1 - a2;
+   float da31 = a3 - a1;
+   float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
+   float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
+
+   tri->inputs.dadx[slot][i] = dadx;
+   tri->inputs.dady[slot][i] = dady;
+
+   /* calculate a0 as the value which would be sampled for the
+    * fragment at (0,0), taking into account that we want to sample at
+    * pixel centers, in other words (0.5, 0.5).
+    *
+    * this is neat but unfortunately not a good way to do things for
+    * triangles with very large values of dadx or dady as it will
+    * result in the subtraction and re-addition from a0 of a very
+    * large number, which means we'll end up loosing a lot of the
+    * fractional bits and precision from a0.  the way to fix this is
+    * to define a0 as the sample at a pixel center somewhere near vmin
+    * instead - i'll switch to this later.
+    */
+   tri->inputs.a0[slot][i] = (v1[vert_attr][i] -
+                              (dadx * (v1[0][0] - 0.5f) +
+                               dady * (v1[0][1] - 0.5f)));
  }
  
  
@@ -104,34 +99,29 @@ static void linear_coef( struct lp_rast_triangle *tri,
   * divide the interpolated value by the interpolated W at that fragment.
   */
  static void perspective_coef( struct lp_rast_triangle *tri,
+                              unsigned slot,
                               const float (*v1)[4],
                               const float (*v2)[4],
                               const float (*v3)[4],
                               unsigned vert_attr,
-                             unsigned i)
+                              unsigned i)
  {
-   unsigned i;
-
-   input *= 4;
-
-   for (i = 0; i < NUM_CHANNELS; i++) {
-      /* premultiply by 1/w  (v[0][3] is always 1/w):
-       */
-      float a1 = v1[vert_attr][i] * v1[0][3];
-      float a2 = v2[vert_attr][i] * v2[0][3];
-      float a3 = v3[vert_attr][i] * v3[0][3];
-      float da12 = a1 - a2;
-      float da31 = a3 - a1;
-      float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
-      float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
-
-
-      tri->inputs.dadx[input+i] = dadx;
-      tri->inputs.dady[input+i] = dady;
-      tri->inputs.a0[input+i] = (a1 -
-                           (dadx * (v1[0][0] - 0.5f) +
-                            dady * (v1[0][1] - 0.5f)));
-   }
+   /* premultiply by 1/w  (v[0][3] is always 1/w):
+    */
+   float a1 = v1[vert_attr][i] * v1[0][3];
+   float a2 = v2[vert_attr][i] * v2[0][3];
+   float a3 = v3[vert_attr][i] * v3[0][3];
+   float da12 = a1 - a2;
+   float da31 = a3 - a1;
+   float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
+   float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
+
+
+   tri->inputs.dadx[slot][i] = dadx;
+   tri->inputs.dady[slot][i] = dady;
+   tri->inputs.a0[slot][i] = (a1 -
+                              (dadx * (v1[0][0] - 0.5f) +
+                               dady * (v1[0][1] - 0.5f)));
  }
  
  
@@ -142,29 +132,37 @@ static void perspective_coef( struct lp_rast_triangle *tri,
   * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
   */
  static void
-setup_fragcoord_coef(struct lp_rast_triangle *tri, unsigned slot)
+setup_fragcoord_coef(struct lp_rast_triangle *tri,
+                     unsigned slot,
+                     const float (*v1)[4],
+                     const float (*v2)[4],
+                     const float (*v3)[4])
  {
-   slot *= 4;
-
     /*X*/
-   tri->inputs.a0[slot+0] = 0.0;
-   tri->inputs.dadx[slot+0] = 1.0;
-   tri->inputs.dady[slot+0] = 0.0;
+   tri->inputs.a0[slot][0] = 0.0;
+   tri->inputs.dadx[slot][0] = 1.0;
+   tri->inputs.dady[slot][0] = 0.0;
     /*Y*/
-   tri->inputs.a0[slot+1] = 0.0;
-   tri->inputs.dadx[slot+1] = 0.0;
-   tri->inputs.dady[slot+1] = 1.0;
+   tri->inputs.a0[slot][1] = 0.0;
+   tri->inputs.dadx[slot][1] = 0.0;
+   tri->inputs.dady[slot][1] = 1.0;
     /*Z*/
-   tri->inputs.a0[slot+2] = tri->inputs.a0[2];
-   tri->inputs.dadx[slot+2] = tri->inputs.dadx[2];
-   tri->inputs.dady[slot+2] = tri->inputs.dady[2];
+   linear_coef(tri, slot, v1, v2, v3, 0, 2);
     /*W*/
-   tri->inputs.a0[slot+3] = tri->inputs.a0[3];
-   tri->inputs.dadx[slot+3] = tri->inputs.dadx[3];
-   tri->inputs.dady[slot+3] = tri->inputs.dady[3];
+   linear_coef(tri, slot, v1, v2, v3, 0, 3);
  }
  
  
+static void setup_facing_coef( struct lp_rast_triangle *tri,
+                               unsigned slot,
+                               boolean frontface )
+{
+   constant_coef( tri, slot, 1.0f - frontface, 0 );
+   constant_coef( tri, slot, 0.0f, 1 ); /* wasted */
+   constant_coef( tri, slot, 0.0f, 2 ); /* wasted */
+   constant_coef( tri, slot, 0.0f, 3 ); /* wasted */
+}
+
  
  /**
   * Compute the tri->coef[] array dadx, dady, a0 values.
@@ -176,40 +174,42 @@ static void setup_tri_coefficients( struct setup_context *setup,
                                     const float (*v3)[4],
                                     boolean frontface )
  {
-   unsigned input;
+   unsigned slot;
  
-   /* z and w are done by linear interpolation:
+   /* The internal position input is in slot zero:
      */
-   setup_fragcoord_coef(tri, 0);
-            linear_coef(tri, input, v1, v2, v3, vert_attr, i);
+   setup_fragcoord_coef(tri, 0, v1, v2, v3);
  
     /* setup interpolation for all the remaining attrbutes:
      */
-   for (input = 0; input < setup->fs.nr_inputs; input++) {
-      unsigned vert_attr = setup->fs.input[input].src_index;
+   for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
+      unsigned vert_attr = setup->fs.input[slot].src_index;
        unsigned i;
  
-      switch (setup->fs.input[input].interp_mode) {
+      switch (setup->fs.input[slot].interp) {
        case LP_INTERP_CONSTANT:
-         constant_coef(tri, input, v3, vert_attr, i);
+         for (i = 0; i < NUM_CHANNELS; i++)
+            constant_coef(tri, slot+1, v3[vert_attr][i], i);
           break;
  
        case LP_INTERP_LINEAR:
-         linear_coef(tri, input, v1, v2, v3, vert_attr, i);
+         for (i = 0; i < NUM_CHANNELS; i++)
+            linear_coef(tri, slot+1, v1, v2, v3, vert_attr, i);
           break;
  
        case LP_INTERP_PERSPECTIVE:
-            perspective_coef(tri, input, v1, v2, v3, vert_attr, i);
+         for (i = 0; i < NUM_CHANNELS; i++)
+            perspective_coef(tri, slot+1, v1, v2, v3, vert_attr, i);
           break;
  
-      case LP_INTERP_POS:
-         setup_fragcoord_coef(tri, input);
+      case LP_INTERP_POSITION:
+         /* XXX: fix me - duplicates the values in slot zero.
+          */
+         setup_fragcoord_coef(tri, slot+1, v1, v2, v3);
           break;
  
        case LP_INTERP_FACING:
-         tri->inputs.a0[input*4+0] = 1.0f - frontface;
-         tri->inputs.dadx[input*4+0] = 0.0;
-         tri->da[input].dady[0] = 0.0;
+         setup_facing_coef(tri, slot+1, frontface);
           break;
  
        default:
@@ -246,14 +246,14 @@ static inline float subpixel_snap( float a )
  #define MAX3(a,b,c) MAX2(MAX2(a,b),c)
  
  static void 
-do_triangle_ccw(struct lp_setup *setup,
+do_triangle_ccw(struct setup_context *setup,
                 const float (*v1)[4],
                 const float (*v2)[4],
                 const float (*v3)[4],
                 boolean frontfacing )
  {
-   const int rt_width = setup->framebuffer.cbufs[0]->width;
-   const int rt_height = setup->framebuffer.cbufs[0]->height;
+   const int rt_width = setup->fb.width;
+   const int rt_height = setup->fb.height;
  
     const float y1 = subpixel_snap(v1[0][1]);
     const float y2 = subpixel_snap(v2[0][1]);
@@ -263,7 +263,7 @@ do_triangle_ccw(struct lp_setup *setup,
     const float x2 = subpixel_snap(v2[0][0]);
     const float x3 = subpixel_snap(v3[0][0]);
     
-   struct lp_setup_triangle *tri = lp_setup_alloc_data( setup, sizeof *tri );
+   struct lp_setup_triangle *tri = get_data( setup, sizeof *tri );
     float area;
     float c1, c2, c3;
     int i;
author	Keith Whitwell <keithw@vmware.com>
	Fri, 9 Oct 2009 09:23:53 +0000 (10:23 +0100)
committer	Keith Whitwell <keithw@vmware.com>
	Fri, 9 Oct 2009 09:23:53 +0000 (10:23 +0100)