From 82d25963a838cfebdeb9b080169979329ee850ea Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 20 Jun 2012 13:40:45 -0700 Subject: [PATCH] i965: Compute dFdy() correctly for FBOs. On i965, dFdx() and dFdy() are computed by taking advantage of the fact that each consecutive set of 4 pixels dispatched to the fragment shader always constitutes a contiguous 2x2 block of pixels in a fixed arrangement known as a "sub-span". So we calculate dFdx() by taking the difference between the values computed for the left and right halves of the sub-span, and we calculate dFdy() by taking the difference between the values computed for the top and bottom halves of the sub-span. However, there's a subtlety when FBOs are in use: since FBOs use a coordinate system where the origin is at the upper left, and window system framebuffers use a coordinate system where the origin is at the lower left, the computation of dFdy() needs to be negated for FBOs. This patch modifies the fragment shader back-ends to negate the value of dFdy() when an FBO is in use. It also modifies the code that populates the program key (brw_wm_populate_key() and brw_fs_precompile()) so that they always record in the program key whether we are rendering to an FBO or to a window system framebuffer; this ensures that the fragment shader will get recompiled when switching between FBO and non-FBO use. This will result in unnecessary recompiles of fragment shaders that don't use dFdy(). To fix that, we will need to adapt the GLSL and NV_fragment_program front-ends to record whether or not a given shader uses dFdy(). I plan to implement this in a future patch series; I've left FIXME comments in the code as a reminder. Fixes Piglit test "fbo-deriv". NOTE: This is a candidate for stable release branches. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 10 ++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 3 ++- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 14 +++++++++++--- src/mesa/drivers/dri/i965/brw_wm.c | 10 ++++++++++ src/mesa/drivers/dri/i965/brw_wm.h | 3 ++- src/mesa/drivers/dri/i965/brw_wm_emit.c | 15 +++++++++++---- 6 files changed, 46 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 313e720..43efd68 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1848,6 +1848,13 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) struct brw_context *brw = brw_context(ctx); struct brw_wm_prog_key key; + /* As a temporary measure we assume that all programs use dFdy() (and hence + * need to be compiled differently depending on whether we're rendering to + * an FBO). FIXME: set this bool correctly based on the contents of the + * program. + */ + bool program_uses_dfdy = true; + if (!prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) return true; @@ -1892,6 +1899,9 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) if (fp->Base.InputsRead & FRAG_BIT_WPOS) { key.drawable_height = ctx->DrawBuffer->Height; + } + + if ((fp->Base.InputsRead & FRAG_BIT_WPOS) || program_uses_dfdy) { key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 0c6c3e4..2c2cb6c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -534,7 +534,8 @@ public: struct brw_reg src); void generate_discard(fs_inst *inst); void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); - void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); + void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src, + bool negate_value); void generate_spill(fs_inst *inst, struct brw_reg src); void generate_unspill(fs_inst *inst, struct brw_reg dst); void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 522123c..0881ad7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -441,8 +441,13 @@ fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) brw_ADD(p, dst, src0, negate(src1)); } +/* The negate_value boolean is used to negate the derivative computation for + * FBOs, since they place the origin at the upper left instead of the lower + * left. + */ void -fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) +fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src, + bool negate_value) { struct brw_reg src0 = brw_reg(src.file, src.nr, 0, BRW_REGISTER_TYPE_F, @@ -456,7 +461,10 @@ fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) BRW_WIDTH_4, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - brw_ADD(p, dst, src0, negate(src1)); + if (negate_value) + brw_ADD(p, dst, src1, negate(src0)); + else + brw_ADD(p, dst, src0, negate(src1)); } void @@ -902,7 +910,7 @@ fs_visitor::generate_code() generate_ddx(inst, dst, src[0]); break; case FS_OPCODE_DDY: - generate_ddy(inst, dst, src[0]); + generate_ddy(inst, dst, src[0], c->key.render_to_fbo); break; case FS_OPCODE_SPILL: diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index e919e5a..300e3bb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -420,6 +420,13 @@ static void brw_wm_populate_key( struct brw_context *brw, GLuint line_aa; GLuint i; + /* As a temporary measure we assume that all programs use dFdy() (and hence + * need to be compiled differently depending on whether we're rendering to + * an FBO). FIXME: set this bool correctly based on the contents of the + * program. + */ + bool program_uses_dfdy = true; + memset(key, 0, sizeof(*key)); /* Build the index for table lookup @@ -519,6 +526,9 @@ static void brw_wm_populate_key( struct brw_context *brw, */ if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) { key->drawable_height = ctx->DrawBuffer->Height; + } + + if ((fp->program.Base.InputsRead & FRAG_BIT_WPOS) || program_uses_dfdy) { key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); } diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 8f1cb8c..2cde2a0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -346,7 +346,8 @@ void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, bool is_ddx, - const struct brw_reg *arg0); + const struct brw_reg *arg0, + bool negate_value); void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index e27ff35..1258efe 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -457,12 +457,16 @@ void emit_frontfacing(struct brw_compile *p, * between each other. We could probably do it like ddx and swizzle the right * order later, but bail for now and just produce * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) + * + * The negate_value boolean is used to negate the d/dy computation for FBOs, + * since they place the origin at the upper left instead of the lower left. */ void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, bool is_ddx, - const struct brw_reg *arg0) + const struct brw_reg *arg0, + bool negate_value) { int i; struct brw_reg src0, src1; @@ -498,7 +502,10 @@ void emit_ddxy(struct brw_compile *p, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); } - brw_ADD(p, dst[i], src0, negate(src1)); + if (negate_value) + brw_ADD(p, dst[i], src1, negate(src0)); + else + brw_ADD(p, dst[i], src0, negate(src1)); } } if (mask & SATURATE) @@ -1746,11 +1753,11 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_DDX: - emit_ddxy(p, dst, dst_flags, true, args[0]); + emit_ddxy(p, dst, dst_flags, true, args[0], false); break; case OPCODE_DDY: - emit_ddxy(p, dst, dst_flags, false, args[0]); + emit_ddxy(p, dst, dst_flags, false, args[0], c->key.render_to_fbo); break; case OPCODE_DP2: -- 2.7.4