i965: Add support for correct GL_CLAMP behavior by clamping coordinates.

author Eric Anholt <eric@anholt.net>

Tue, 2 Nov 2010 16:11:17 +0000 (09:11 -0700)

committer Eric Anholt <eric@anholt.net>

Wed, 18 May 2011 20:57:17 +0000 (13:57 -0700)
author Eric Anholt <eric@anholt.net>
Tue, 2 Nov 2010 16:11:17 +0000 (09:11 -0700)
committer Eric Anholt <eric@anholt.net>
Wed, 18 May 2011 20:57:17 +0000 (13:57 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c

index 44ede60..bcfd678 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -325,42 +325,6 @@ static GLboolean check_fallbacks( struct brw_context *brw,
             return GL_TRUE;
     }
  
-   /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
-    * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
-    * as GL_CLAMP_TO_EDGE instead.  If we're using GL_CLAMP, and
-    * we want strict conformance, force the fallback.
-    * Right now, we only do this for 2D textures.
-    */
-   {
-      int u;
-      for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
-         struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
-
-         if (texUnit->Enabled) {
-           struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, u);
-
-            if (texUnit->Enabled & TEXTURE_1D_BIT) {
-               if (sampler->WrapS == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-            if (texUnit->Enabled & TEXTURE_2D_BIT) {
-               if (sampler->WrapS == GL_CLAMP ||
-                   sampler->WrapT == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-            if (texUnit->Enabled & TEXTURE_3D_BIT) {
-               if (sampler->WrapS == GL_CLAMP ||
-                   sampler->WrapT == GL_CLAMP ||
-                   sampler->WrapR == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-         }
-      }
-   }
-      
     /* Nothing stopping us from the fast path now */
     return GL_FALSE;
  }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 2157c93..1943ab6 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1172,7 +1172,8 @@ fs_visitor::visit(ir_assignment *ir)
  }
  
  fs_inst *
-fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
+fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                             int sampler)
  {
     int mlen;
     int base_mrf = 1;
@@ -1184,7 +1185,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
  
     if (ir->shadow_comparitor) {
        for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-        emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
+        fs_inst *inst = emit(BRW_OPCODE_MOV,
+                             fs_reg(MRF, base_mrf + mlen + i), coordinate);
+        if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+           inst->saturate = true;
+
          coordinate.reg_offset++;
        }
        /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
@@ -1212,7 +1217,10 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
        mlen++;
     } else if (ir->op == ir_tex) {
        for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-        emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
+        fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
+                             coordinate);
+        if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+           inst->saturate = true;
          coordinate.reg_offset++;
        }
        /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
@@ -1226,7 +1234,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
        assert(ir->op == ir_txb || ir->op == ir_txl);
  
        for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-        emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), coordinate);
+        fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF,
+                                                    base_mrf + mlen + i * 2),
+                             coordinate);
+        if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+           inst->saturate = true;
          coordinate.reg_offset++;
        }
  
@@ -1298,15 +1310,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
   * surprising in the disassembly.
   */
  fs_inst *
-fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
+fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                             int sampler)
  {
     int mlen = 1; /* g0 header always present. */
     int base_mrf = 1;
     int reg_width = c->dispatch_width / 8;
  
     for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * reg_width),
-          coordinate);
+      fs_inst *inst = emit(BRW_OPCODE_MOV,
+                          fs_reg(MRF, base_mrf + mlen + i * reg_width),
+                          coordinate);
+      if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+        inst->saturate = true;
        coordinate.reg_offset++;
     }
     mlen += ir->coordinate->type->vector_elements * reg_width;
@@ -1357,7 +1373,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
  }
  
  fs_inst *
-fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate)
+fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                             int sampler)
  {
     int mlen = 1; /* g0 header always present. */
     int base_mrf = 1;
@@ -1391,8 +1408,10 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate)
  
     /* Set up the coordinate */
     for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
-          coordinate);
+      fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
+                          coordinate);
+      if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+        inst->saturate = true;
        coordinate.reg_offset++;
        mlen += reg_width;
     }
@@ -1517,11 +1536,11 @@ fs_visitor::visit(ir_texture *ir)
     fs_reg dst = fs_reg(this, glsl_type::vec4_type);
  
     if (intel->gen >= 7) {
-      inst = emit_texture_gen7(ir, dst, coordinate);
+      inst = emit_texture_gen7(ir, dst, coordinate, sampler);
     } else if (intel->gen >= 5) {
-      inst = emit_texture_gen5(ir, dst, coordinate);
+      inst = emit_texture_gen5(ir, dst, coordinate, sampler);
     } else {
-      inst = emit_texture_gen4(ir, dst, coordinate);
+      inst = emit_texture_gen4(ir, dst, coordinate, sampler);
     }
  
     /* If there's an offset, we already set up m1.  To avoid the implied move,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index 1b37ef5..4b355c9 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -512,9 +512,12 @@ public:
     fs_reg *emit_general_interpolation(ir_variable *ir);
     void emit_interpolation_setup_gen4();
     void emit_interpolation_setup_gen6();
-   fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate);
-   fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate);
-   fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate);
+   fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                             int sampler);
+   fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                             int sampler);
+   fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                             int sampler);
     fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0);
     fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1);
     bool try_emit_saturate(ir_expression *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h

index 11a8732..3aaa7c6 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -192,7 +192,7 @@ GLuint translate_tex_format(gl_format mesa_format,
                             GLenum srgb_decode);
  
  /* brw_wm_sampler_state.c */
-GLuint translate_wrap_mode(GLenum wrap);
+uint32_t translate_wrap_mode(GLenum wrap, bool using_nearest);
  void upload_default_color(struct brw_context *brw,
                           struct gl_sampler_object *sampler,
                           int unit);
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c

index 40589b0..9079762 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -410,6 +410,16 @@ static void brw_wm_populate_key( struct brw_context *brw,
                           swizzles[GET_SWZ(t->_Swizzle, 1)],
                           swizzles[GET_SWZ(t->_Swizzle, 2)],
                           swizzles[GET_SWZ(t->_Swizzle, 3)]);
+
+        if (sampler->MinFilter != GL_NEAREST &&
+            sampler->MagFilter != GL_NEAREST) {
+           if (sampler->WrapS == GL_CLAMP)
+              key->gl_clamp_mask[0] |= 1 << i;
+           if (sampler->WrapT == GL_CLAMP)
+              key->gl_clamp_mask[1] |= 1 << i;
+           if (sampler->WrapR == GL_CLAMP)
+              key->gl_clamp_mask[2] |= 1 << i;
+        }
        }
        else {
           key->tex_swizzles[i] = SWIZZLE_NOOP;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h

index a5f99a0..8ab531b 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -71,9 +71,9 @@ struct brw_wm_prog_key {
     GLuint shadowtex_mask:16;
     GLuint yuvtex_mask:16;
     GLuint yuvtex_swap_mask:16; /* UV swaped */
+   uint16_t gl_clamp_mask[3];
  
     GLushort tex_swizzles[BRW_MAX_TEX_UNIT];
-
     GLushort drawable_height;
     GLbitfield64 vp_outputs_written;
     GLuint iz_lookup;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c

index fd4cd89..f61757a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1100,11 +1100,16 @@ void emit_tex(struct brw_wm_compile *c,
  
     /* Emit the texcoords. */
     for (i = 0; i < nr_texcoords; i++) {
+      if (c->key.gl_clamp_mask[i] & (1 << sampler))
+        brw_set_saturate(p, true);
+
        if (emit & (1<<i))
          brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
        else
          brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
        cur_mrf += mrf_per_channel;
+
+      brw_set_saturate(p, false);
     }
  
     /* Fill in the shadow comparison reference value. */
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c

index 918c1d6..5de39aa 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -44,20 +44,28 @@
  
  
  
-/* The brw (and related graphics cores) do not support GL_CLAMP.  The
- * Intel drivers for "other operating systems" implement GL_CLAMP as
- * GL_CLAMP_TO_EDGE, so the same is done here.
- */
-GLuint
-translate_wrap_mode(GLenum wrap)
+uint32_t
+translate_wrap_mode(GLenum wrap, bool using_nearest)
  {
     switch( wrap ) {
     case GL_REPEAT: 
        return BRW_TEXCOORDMODE_WRAP;
-   case GL_CLAMP:  
-      return BRW_TEXCOORDMODE_CLAMP;
+   case GL_CLAMP:
+      /* GL_CLAMP is the weird mode where coordinates are clamped to
+       * [0.0, 1.0], so linear filtering of coordinates outside of
+       * [0.0, 1.0] give you half edge texel value and half border
+       * color.  The fragment shader will clamp the coordinates, and
+       * we set clamp_border here, which gets the result desired.  We
+       * just use clamp(_to_edge) for nearest, because for nearest
+       * clamping to 1.0 gives border color instead of the desired
+       * edge texels.
+       */
+      if (using_nearest)
+        return BRW_TEXCOORDMODE_CLAMP;
+      else
+        return BRW_TEXCOORDMODE_CLAMP_BORDER;
     case GL_CLAMP_TO_EDGE: 
-      return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
+      return BRW_TEXCOORDMODE_CLAMP;
     case GL_CLAMP_TO_BORDER: 
        return BRW_TEXCOORDMODE_CLAMP_BORDER;
     case GL_MIRRORED_REPEAT: 
@@ -155,11 +163,13 @@ static void brw_update_sampler_state(struct brw_context *brw,
     struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
     struct gl_texture_object *texObj = texUnit->_Current;
     struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
+   bool using_nearest = false;
  
     switch (gl_sampler->MinFilter) {
     case GL_NEAREST:
        sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
        sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      using_nearest = true;
        break;
     case GL_LINEAR:
        sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
@@ -200,6 +210,7 @@ static void brw_update_sampler_state(struct brw_context *brw,
        switch (gl_sampler->MagFilter) {
        case GL_NEAREST:
          sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+        using_nearest = true;
          break;
        case GL_LINEAR:
          sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
@@ -209,9 +220,12 @@ static void brw_update_sampler_state(struct brw_context *brw,
        }  
     }
  
-   sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR);
-   sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS);
-   sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT);
+   sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
+                                                 using_nearest);
+   sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
+                                                 using_nearest);
+   sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
+                                                 using_nearest);
  
     if (intel->gen >= 6 &&
         sampler->ss0.min_filter != sampler->ss0.mag_filter)
diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c

index 8487a8f..95f6fbf 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
@@ -41,11 +41,13 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
     struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
     struct gl_texture_object *texObj = texUnit->_Current;
     struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
+   bool using_nearest = false;
  
     switch (gl_sampler->MinFilter) {
     case GL_NEAREST:
        sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
        sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      using_nearest = true;
        break;
     case GL_LINEAR:
        sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
@@ -85,6 +87,7 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
        switch (gl_sampler->MagFilter) {
        case GL_NEAREST:
          sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+        using_nearest = true;
          break;
        case GL_LINEAR:
          sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
@@ -94,9 +97,12 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
        }
     }
  
-   sampler->ss3.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR);
-   sampler->ss3.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS);
-   sampler->ss3.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT);
+   sampler->ss3.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
+                                                 using_nearest);
+   sampler->ss3.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
+                                                 using_nearest);
+   sampler->ss3.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
+                                                 using_nearest);
  
     /* Cube-maps on 965 and later must use the same wrap mode for all 3
      * coordinate dimensions.  Futher, only CUBE and CLAMP are valid.
author	Eric Anholt <eric@anholt.net>
	Tue, 2 Nov 2010 16:11:17 +0000 (09:11 -0700)
committer	Eric Anholt <eric@anholt.net>
	Wed, 18 May 2011 20:57:17 +0000 (13:57 -0700)
src/mesa/drivers/dri/i965/brw_draw.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_state.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_wm.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_wm.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_wm_emit.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_wm_sampler_state.c		patch \| blob \| history
src/mesa/drivers/dri/i965/gen7_sampler_state.c		patch \| blob \| history