r300g: fix some bugs with zbuffer compression (v4)
authorMarek Olšák <maraeo@gmail.com>
Tue, 25 Jan 2011 04:37:52 +0000 (05:37 +0100)
committerMarek Olšák <maraeo@gmail.com>
Thu, 27 Jan 2011 17:12:01 +0000 (18:12 +0100)
This drops the memblock manager for ZMASK. Instead, only one zbuffer can be
compressed at a time. Note that this does not necessarily have to be slower.
When there is a large number of zbuffers, compression might be used more often
than it was before. It's also easier to debug.

How it works:
1) 'clear' turns the compression on.
2) If some other zbuffer is set or the currently-bound zbuffer is used
   for texturing, the driver decompresses it and then turns the compression off.

Notes:
- The ZMASK clear has been refactored, so that only one packet3 is used to clear
  ZMASK.
- The 8x8 compression mode is disabled. I couldn't make it work without issues.
- Also removed driver-specific stuff from u_blitter.

Driver status:
- RV530 and R580 appear to just work (finally).
- RV570 should work, but there may be an issue that we don't correctly
  calculate the number of dwords to clear, resulting in a partially
  uninitialized zbuffer.
- RS690 misrenders as if no ZMASK clear happened. No idea what's going on.
- RV350 may even hardlock. This issue was already present and this patch doesn't
  fix it.

I think we are still missing some hardware info we need to make the zbuffer
compression work fully.

Note that there is also an issue with HiZ, resulting in a sort of blocky
zigzagged corruption around some objects.

14 files changed:
src/gallium/auxiliary/util/u_blitter.c
src/gallium/auxiliary/util/u_blitter.h
src/gallium/auxiliary/util/u_inlines.h
src/gallium/drivers/r300/r300_blit.c
src/gallium/drivers/r300/r300_chipset.c
src/gallium/drivers/r300/r300_chipset.h
src/gallium/drivers/r300/r300_context.c
src/gallium/drivers/r300/r300_context.h
src/gallium/drivers/r300/r300_emit.c
src/gallium/drivers/r300/r300_hyperz.c
src/gallium/drivers/r300/r300_hyperz.h
src/gallium/drivers/r300/r300_state.c
src/gallium/drivers/r300/r300_state_derived.c
src/gallium/drivers/r300/r300_texture.c

index e72f267..1f9d509 100644 (file)
@@ -86,7 +86,6 @@ struct blitter_context_priv
    void *dsa_write_depth_keep_stencil;
    void *dsa_keep_depth_stencil;
    void *dsa_keep_depth_write_stencil;
-   void *dsa_flush_depth_stencil;
 
    void *velem_state;
 
@@ -158,10 +157,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    ctx->dsa_keep_depth_stencil =
       pipe->create_depth_stencil_alpha_state(pipe, &dsa);
 
-   dsa.depth.writemask = 1;
-   ctx->dsa_flush_depth_stencil =
-      pipe->create_depth_stencil_alpha_state(pipe, &dsa);
-
    dsa.depth.enabled = 1;
    dsa.depth.writemask = 1;
    dsa.depth.func = PIPE_FUNC_ALWAYS;
@@ -247,7 +242,6 @@ void util_blitter_destroy(struct blitter_context *blitter)
                                           ctx->dsa_write_depth_keep_stencil);
    pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
    pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil);
-   pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil);
 
    pipe->delete_rasterizer_state(pipe, ctx->rs_state);
    pipe->delete_vs_state(pipe, ctx->vs);
@@ -665,12 +659,13 @@ static void blitter_draw_rectangle(struct blitter_context *blitter,
    blitter_draw_quad(ctx);
 }
 
-void util_blitter_clear(struct blitter_context *blitter,
-                        unsigned width, unsigned height,
-                        unsigned num_cbufs,
-                        unsigned clear_buffers,
-                        const float *rgba,
-                        double depth, unsigned stencil)
+static void util_blitter_clear_custom(struct blitter_context *blitter,
+                                      unsigned width, unsigned height,
+                                      unsigned num_cbufs,
+                                      unsigned clear_buffers,
+                                      const float *rgba,
+                                      double depth, unsigned stencil,
+                                      void *custom_blend, void *custom_dsa)
 {
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_context *pipe = ctx->base.pipe;
@@ -681,26 +676,28 @@ void util_blitter_clear(struct blitter_context *blitter,
    blitter_check_saved_CSOs(ctx);
 
    /* bind CSOs */
-   if (clear_buffers & PIPE_CLEAR_COLOR)
+   if (custom_blend) {
+      pipe->bind_blend_state(pipe, custom_blend);
+   } else if (clear_buffers & PIPE_CLEAR_COLOR) {
       pipe->bind_blend_state(pipe, ctx->blend_write_color);
-   else
+   } else {
       pipe->bind_blend_state(pipe, ctx->blend_keep_color);
+   }
 
-   if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
-      sr.ref_value[0] = stencil & 0xff;
+   if (custom_dsa) {
+      pipe->bind_depth_stencil_alpha_state(pipe, custom_dsa);
+   } else if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
-      pipe->set_stencil_ref(pipe, &sr);
-   }
-   else if (clear_buffers & PIPE_CLEAR_DEPTH) {
+   } else if (clear_buffers & PIPE_CLEAR_DEPTH) {
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_keep_stencil);
-   }
-   else if (clear_buffers & PIPE_CLEAR_STENCIL) {
-      sr.ref_value[0] = stencil & 0xff;
+   } else if (clear_buffers & PIPE_CLEAR_STENCIL) {
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil);
-      pipe->set_stencil_ref(pipe, &sr);
-   }
-   else
+   } else {
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
+   }
+
+   sr.ref_value[0] = stencil & 0xff;
+   pipe->set_stencil_ref(pipe, &sr);
 
    pipe->bind_rasterizer_state(pipe, ctx->rs_state);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
@@ -713,6 +710,27 @@ void util_blitter_clear(struct blitter_context *blitter,
    blitter_restore_CSOs(ctx);
 }
 
+void util_blitter_clear(struct blitter_context *blitter,
+                        unsigned width, unsigned height,
+                        unsigned num_cbufs,
+                        unsigned clear_buffers,
+                        const float *rgba,
+                        double depth, unsigned stencil)
+{
+   util_blitter_clear_custom(blitter, width, height, num_cbufs,
+                             clear_buffers, rgba, depth, stencil,
+                             NULL, NULL);
+}
+
+void util_blitter_clear_depth_custom(struct blitter_context *blitter,
+                                     unsigned width, unsigned height,
+                                     double depth, void *custom_dsa)
+{
+    const float rgba[4] = {0, 0, 0, 0};
+    util_blitter_clear_custom(blitter, width, height, 0,
+                              0, rgba, depth, 0, NULL, custom_dsa);
+}
+
 static
 boolean is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2,
                    unsigned dx1, unsigned dx2, unsigned dy1, unsigned dy2)
@@ -1024,12 +1042,3 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
                            UTIL_BLITTER_ATTRIB_NONE, NULL);
    blitter_restore_CSOs(ctx);
 }
-
-/* flush a region of a depth stencil surface for r300g */
-void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
-                                      struct pipe_surface *dstsurf)
-{
-       struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
-       util_blitter_custom_depth_stencil(blitter, dstsurf, NULL,
-                                         ctx->dsa_flush_depth_stencil, 0.0f);
-}
index 922a858..47494c9 100644 (file)
@@ -55,13 +55,13 @@ struct blitter_context
     * \param y1      A Y coordinate of the top-left corner.
     * \param x2      An X coordinate of the bottom-right corner.
     * \param y2      A Y coordinate of the bottom-right corner.
-    * \param depth  A depth which the rectangle is rendered at.
+    * \param depth   A depth which the rectangle is rendered at.
     *
     * \param type   Semantics of the attributes "attrib".
     *               If type is UTIL_BLITTER_ATTRIB_NONE, ignore them.
     *               If type is UTIL_BLITTER_ATTRIB_COLOR, the attributes
-    *               make up a constant RGBA color, and should go to the COLOR0
-    *               varying slot of a fragment shader.
+    *               make up a constant RGBA color, and should go
+    *               to the GENERIC0 varying slot of a fragment shader.
     *               If type is UTIL_BLITTER_ATTRIB_TEXCOORD, {a1, a2} and
     *               {a3, a4} specify top-left and bottom-right texture
     *               coordinates of the rectangle, respectively, and should go
@@ -141,6 +141,10 @@ void util_blitter_clear(struct blitter_context *blitter,
                         const float *rgba,
                         double depth, unsigned stencil);
 
+void util_blitter_clear_depth_custom(struct blitter_context *blitter,
+                                     unsigned width, unsigned height,
+                                     double depth, void *custom_dsa);
+
 /**
  * Copy a block of pixels from one surface to another.
  *
@@ -200,9 +204,6 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
                                       unsigned dstx, unsigned dsty,
                                       unsigned width, unsigned height);
 
-void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
-                                      struct pipe_surface *dstsurf);
-
 void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
                                       struct pipe_surface *zsurf,
                                       struct pipe_surface *cbsurf,
index b4870bc..fc4eb8f 100644 (file)
@@ -160,6 +160,21 @@ pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps,
    pipe_surface_reset(ctx, ps, pt, level, layer, flags);
 }
 
+/* Return true if the surfaces are equal. */
+static INLINE boolean
+pipe_surface_equal(struct pipe_surface *s1, struct pipe_surface *s2)
+{
+   return s1->texture == s2->texture &&
+          s1->format == s2->format &&
+          (s1->texture->target != PIPE_BUFFER ||
+           (s1->u.buf.first_element == s2->u.buf.first_element &&
+            s1->u.buf.last_element == s2->u.buf.last_element)) &&
+          (s1->texture->target == PIPE_BUFFER ||
+           (s1->u.tex.level == s2->u.tex.level &&
+            s1->u.tex.first_layer == s2->u.tex.first_layer &&
+            s1->u.tex.last_layer == s2->u.tex.last_layer));
+}
+
 /*
  * Convenience wrappers for screen buffer functions.
  */
index e195128..f24d558 100644 (file)
@@ -58,8 +58,9 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o
     util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count,
                                      r300->vertex_buffer);
 
-    if (op & (R300_CLEAR_SURFACE | R300_COPY))
+    if (op & (R300_CLEAR_SURFACE | R300_COPY)) {
         util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state);
+    }
 
     if (op & R300_COPY) {
         struct r300_textures_state* state =
@@ -108,6 +109,23 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
     return r300_surface(fb->cbufs[0])->cbzb_allowed;
 }
 
+static boolean r300_fast_zclear_allowed(struct r300_context *r300)
+{
+    struct pipe_framebuffer_state *fb =
+        (struct pipe_framebuffer_state*)r300->fb_state.state;
+
+    /* Cannot decompress zmask with a 16-bit zbuffer.
+     * Also compression causes a hung. */
+    if (util_format_get_blocksizebits(fb->zsbuf->texture->format) == 16)
+        return FALSE;
+
+    /* Cannot use compression with a linear zbuffer. */
+    if (!r300_texture(fb->zsbuf->texture)->desc.microtile)
+        return FALSE;
+
+    return TRUE;
+}
+
 static uint32_t r300_depth_clear_value(enum pipe_format format,
                                        double depth, unsigned stencil)
 {
@@ -132,37 +150,46 @@ static void r300_clear(struct pipe_context* pipe,
                        double depth,
                        unsigned stencil)
 {
-    /* My notes about fastfill:
+    /* My notes about Zbuffer compression:
      *
-     * 1) Only the zbuffer is cleared.
+     * 1) The zbuffer must be micro-tiled and whole microtiles must be
+     *    written if compression is enabled. If microtiling is disabled,
+     *    it locks up.
      *
-     * 2) The zbuffer must be micro-tiled and whole microtiles must be
-     *    written. If microtiling is disabled, it locks up.
+     * 2) There is ZMASK RAM which contains a compressed zbuffer.
+     *    Each dword of the Z Mask contains compression information
+     *    for 16 4x4 pixel tiles, that is 2 bits for each tile.
+     *    On chips with 2 Z pipes, every other dword maps to a different
+     *    pipe. On newer chipsets, there is a new compression mode
+     *    with 8x8 pixel tiles per 2 bits.
      *
-     * 3) There is Z Mask RAM which contains a compressed zbuffer and
-     *    it interacts with fastfill. We should figure out how to use it
-     *    to get more performance.
-     *    This is what we know about the Z Mask:
+     * 3) The FASTFILL bit has nothing to do with filling. It only tells hw
+     *    it should look in the ZMASK RAM first before fetching from a real
+     *    zbuffer.
      *
-     *       Each dword of the Z Mask contains compression information
-     *       for 16 4x4 pixel blocks, that is 2 bits for each block.
-     *       On chips with 2 Z pipes, every other dword maps to a different
-     *       pipe.
+     * 4) If a pixel is in a cleared state, ZB_DEPTHCLEARVALUE is returned
+     *    during zbuffer reads instead of the value that is actually stored
+     *    in the zbuffer memory. A pixel is in a cleared state when its ZMASK
+     *    is equal to 0. Therefore, if you clear ZMASK with zeros, you may
+     *    leave the zbuffer memory uninitialized, but then you must enable
+     *    compression, so that the ZMASK RAM is actually used.
      *
-     * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must
-     *    be equal to 0. (clear the Z Mask RAM with zeros)
+     * 5) Each 4x4 (or 8x8) tile is automatically decompressed and recompressed
+     *    during zbuffer updates. A special decompressing operation should be
+     *    used to fully decompress a zbuffer, which basically just stores all
+     *    compressed tiles in ZMASK to the zbuffer memory.
      *
-     * 5) For 16-bit zbuffer, compression causes a hung with one or
+     * 6) For a 16-bit zbuffer, compression causes a hung with one or
      *    two samples and should not be used.
      *
-     * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears
+     * 7) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears
      *    to avoid needless decompression.
      *
-     * 7) Fastfill must not be used if reading of compressed Z data is disabled
+     * 8) Fastfill must not be used if reading of compressed Z data is disabled
      *    and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE),
      *    i.e. it cannot be used to compress the zbuffer.
      *
-     * 8) ZB_CB_CLEAR does not interact with fastfill in any way.
+     * 9) ZB_CB_CLEAR does not interact with zbuffer compression in any way.
      *
      * - Marek
      */
@@ -179,25 +206,23 @@ static void r300_clear(struct pipe_context* pipe,
     boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
     uint32_t hyperz_dcv = hyperz->zb_depthclearvalue;
 
-    /* Decompress zbuffers that are bound as textures. If we didn't flush here,
-     * it would happen inside the blitter when updating derived state,
-     * causing a blitter operation to be called from inside the blitter,
-     * which would overwrite saved states and they would never get restored. */
-    r300_flush_depth_textures(r300);
-
     /* Enable fast Z clear.
      * The zbuffer must be in micro-tiled mode, otherwise it locks up. */
     if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) {
         hyperz_dcv = hyperz->zb_depthclearvalue =
             r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
 
-        r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG);
-        if (zstex->zmask_mem[fb->zsbuf->u.tex.level]) {
+        if (r300_fast_zclear_allowed(r300)) {
             r300_mark_atom_dirty(r300, &r300->zmask_clear);
             buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
         }
+
         if (zstex->hiz_mem[fb->zsbuf->u.tex.level])
             r300_mark_atom_dirty(r300, &r300->hiz_clear);
+
+        /* XXX Change this to r300_mark_atom_dirty(r300, &r300->hyperz_state);
+         * once hiz offset is constant. */
+        r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
     }
 
     /* Enable CBZB clear. */
@@ -211,7 +236,7 @@ static void r300_clear(struct pipe_context* pipe,
         height = surf->cbzb_height;
 
         r300->cbzb_clear = TRUE;
-        r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+        r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
     }
 
     /* Clear. */
@@ -225,7 +250,7 @@ static void r300_clear(struct pipe_context* pipe,
                            buffers, rgba, depth, stencil);
         r300_blitter_end(r300);
     } else if (r300->zmask_clear.dirty) {
-        /* Just clear zmask and hiz now, this does not use a standard draw
+        /* Just clear zmask and hiz now, this does not use the standard draw
          * procedure. */
         unsigned dwords;
 
@@ -257,16 +282,15 @@ static void r300_clear(struct pipe_context* pipe,
     if (r300->cbzb_clear) {
         r300->cbzb_clear = FALSE;
         hyperz->zb_depthclearvalue = hyperz_dcv;
-        r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+        r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
     }
 
     /* Enable fastfill and/or hiz.
      *
      * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update
      * looks if zmask/hiz is in use and enables fastfill accordingly. */
-    if (zstex &&
-        (zstex->zmask_in_use[fb->zsbuf->u.tex.level] ||
-         zstex->hiz_in_use[fb->zsbuf->u.tex.level])) {
+    if (r300->zmask_in_use ||
+        (zstex && zstex->hiz_in_use[fb->zsbuf->u.tex.level])) {
         r300_mark_atom_dirty(r300, &r300->hyperz_state);
     }
 }
@@ -280,16 +304,16 @@ static void r300_clear_render_target(struct pipe_context *pipe,
 {
     struct r300_context *r300 = r300_context(pipe);
 
-    /* Decompress zbuffers that are bound as textures. If we didn't flush here,
-     * it would happen inside the blitter when updating derived state,
-     * causing a blitter operation to be called from inside the blitter,
-     * which would overwrite saved states and they would never get restored. */
-    r300_flush_depth_textures(r300);
+    r300->zmask_locked = TRUE;
+    r300_mark_atom_dirty(r300, &r300->hyperz_state);
 
     r300_blitter_begin(r300, R300_CLEAR_SURFACE);
     util_blitter_clear_render_target(r300->blitter, dst, rgba,
                                      dstx, dsty, width, height);
     r300_blitter_end(r300);
+
+    r300->zmask_locked = FALSE;
+    r300_mark_atom_dirty(r300, &r300->hyperz_state);
 }
 
 /* Clear a region of a depth stencil surface. */
@@ -302,83 +326,70 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
                                      unsigned width, unsigned height)
 {
     struct r300_context *r300 = r300_context(pipe);
+    struct pipe_framebuffer_state *fb =
+        (struct pipe_framebuffer_state*)r300->fb_state.state;
 
-    /* Decompress zbuffers that are bound as textures. If we didn't flush here,
-     * it would happen inside the blitter when updating derived state,
-     * causing a blitter operation to be called from inside the blitter,
-     * which would overwrite saved states and they would never get restored. */
-    r300_flush_depth_textures(r300);
+    if (r300->zmask_in_use && !r300->zmask_locked) {
+        if (fb->zsbuf->texture == dst->texture) {
+            r300_decompress_zmask(r300);
+        } else {
+            r300->zmask_locked = TRUE;
+            r300_mark_atom_dirty(r300, &r300->hyperz_state);
+        }
+    }
 
     r300_blitter_begin(r300, R300_CLEAR_SURFACE);
     util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil,
                                      dstx, dsty, width, height);
     r300_blitter_end(r300);
+
+    if (r300->zmask_locked) {
+        r300->zmask_locked = FALSE;
+        r300_mark_atom_dirty(r300, &r300->hyperz_state);
+    }
 }
 
-/* Flush a depth stencil buffer. */
-static void r300_flush_depth_stencil(struct pipe_context *pipe,
-                                     struct pipe_resource *dst,
-                                     unsigned level,
-                                     unsigned layer)
+void r300_decompress_zmask(struct r300_context *r300)
 {
-    struct r300_context *r300 = r300_context(pipe);
-    struct pipe_surface *dstsurf, surf_tmpl;
-    struct r300_texture *tex = r300_texture(dst);
+    struct pipe_framebuffer_state *fb =
+        (struct pipe_framebuffer_state*)r300->fb_state.state;
 
-    if (!tex->zmask_mem[level])
-        return;
-    if (!tex->zmask_in_use[level])
+    if (!r300->zmask_in_use || r300->zmask_locked)
         return;
 
-    surf_tmpl.format = dst->format;
-    surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL;
-    surf_tmpl.u.tex.level = level;
-    surf_tmpl.u.tex.first_layer = layer;
-    surf_tmpl.u.tex.last_layer = layer;
-    dstsurf = pipe->create_surface(pipe, dst, &surf_tmpl);
-
-    r300->z_decomp_rd = TRUE;
+    r300->zmask_decompress = TRUE;
+    r300_mark_atom_dirty(r300, &r300->hyperz_state);
 
-    r300_blitter_begin(r300, R300_CLEAR_SURFACE);
-    util_blitter_flush_depth_stencil(r300->blitter, dstsurf);
+    r300_blitter_begin(r300, R300_CLEAR);
+    util_blitter_clear_depth_custom(r300->blitter, fb->width, fb->height, 0,
+                                    r300->dsa_decompress_zmask);
     r300_blitter_end(r300);
-    r300->z_decomp_rd = FALSE;
 
-    tex->zmask_in_use[level] = FALSE;
-    pipe_surface_reference(&dstsurf, NULL);
+    r300->zmask_decompress = FALSE;
+    r300->zmask_in_use = FALSE;
+    r300_mark_atom_dirty(r300, &r300->hyperz_state);
 }
 
-/* We can't use compressed zbuffers as samplers. */
-void r300_flush_depth_textures(struct r300_context *r300)
+void r300_decompress_zmask_locked_unsafe(struct r300_context *r300)
 {
-    struct r300_textures_state *state =
-        (struct r300_textures_state*)r300->textures_state.state;
-    unsigned i, level;
-    unsigned count = MIN2(state->sampler_view_count,
-                          state->sampler_state_count);
-
-    if (r300->z_decomp_rd)
-        return;
-
-    for (i = 0; i < count; i++)
-        if (state->sampler_views[i] && state->sampler_states[i]) {
-            struct pipe_resource *tex = state->sampler_views[i]->base.texture;
-
-            if (tex->target == PIPE_TEXTURE_3D ||
-                tex->target == PIPE_TEXTURE_CUBE)
-                continue;
+    struct pipe_framebuffer_state fb = {0};
+    fb.width = r300->locked_zbuffer->width;
+    fb.height = r300->locked_zbuffer->height;
+    fb.nr_cbufs = 0;
+    fb.zsbuf = r300->locked_zbuffer;
+
+    r300->context.set_framebuffer_state(&r300->context, &fb);
+    r300_decompress_zmask(r300);
+}
 
-            /* Ignore non-depth textures.
-             * Also ignore reinterpreted depth textures, e.g. resource_copy. */
-            if (!util_format_is_depth_or_stencil(tex->format))
-                continue;
+void r300_decompress_zmask_locked(struct r300_context *r300)
+{
+    struct pipe_framebuffer_state saved_fb = {0};
 
-            for (level = 0; level <= tex->last_level; level++)
-                if (r300_texture(tex)->zmask_in_use[level]) {
-                    /* We don't handle 3D textures and cubemaps yet. */
-                    r300_flush_depth_stencil(&r300->context, tex, level, 0);
-                 }
-        }
+    util_copy_framebuffer_state(&saved_fb, r300->fb_state.state);
+    r300_decompress_zmask_locked_unsafe(r300);
+    r300->context.set_framebuffer_state(&r300->context, &saved_fb);
+    util_unreference_framebuffer_state(&saved_fb);
 }
 
 /* Copy a block of pixels from one surface to another using HW. */
@@ -393,8 +404,6 @@ static void r300_hw_copy_region(struct pipe_context* pipe,
     struct r300_context* r300 = r300_context(pipe);
 
     r300_blitter_begin(r300, R300_COPY);
-
-    /* Do a copy */
     util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz,
                              src, src_level, src_box, TRUE);
     r300_blitter_end(r300);
@@ -409,10 +418,22 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
                                       unsigned src_level,
                                       const struct pipe_box *src_box)
 {
+    struct r300_context *r300 = r300_context(pipe);
+    struct pipe_framebuffer_state *fb =
+        (struct pipe_framebuffer_state*)r300->fb_state.state;
     enum pipe_format old_format = dst->format;
     enum pipe_format new_format = old_format;
     const struct util_format_description *desc = util_format_description(old_format);
-    boolean is_depth;
+
+    if (r300->zmask_in_use && !r300->zmask_locked) {
+        if (fb->zsbuf->texture == src ||
+            fb->zsbuf->texture == dst) {
+            r300_decompress_zmask(r300);
+        } else {
+            r300->zmask_locked = TRUE;
+            r300_mark_atom_dirty(r300, &r300->hyperz_state);
+        }
+    }
 
     if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
         (!pipe->screen->is_format_supported(pipe->screen,
@@ -441,11 +462,6 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
         }
     }
 
-    is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
-    if (is_depth) {
-        r300_flush_depth_stencil(pipe, src, src_level, src_box->z);
-    }
-
     if (old_format != new_format) {
         r300_texture_reinterpret_format(pipe->screen,
                                         dst, new_format);
@@ -462,6 +478,11 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
         r300_texture_reinterpret_format(pipe->screen,
                                         src, old_format);
     }
+
+    if (r300->zmask_locked) {
+        r300->zmask_locked = FALSE;
+        r300_mark_atom_dirty(r300, &r300->hyperz_state);
+    }
 }
 
 void r300_init_blit_functions(struct r300_context *r300)
index 2b183f6..15dc6d0 100644 (file)
@@ -424,5 +424,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
     }
 
     caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350;
+    /* XXX The 8x8 compression mode doesn't always work (piglit/fbo-depth fails). */
+    caps->z_compress = /*caps->is_rv350 ? R300_ZCOMP_8X8 :*/ R300_ZCOMP_4X4;
     caps->dxtc_swizzle = caps->is_r400 || caps->is_r500;
 }
index f2035d2..0be161f 100644 (file)
 #define PIPE_ZMASK_SIZE 4096
 #define RV3xx_ZMASK_SIZE 5120
 
+/* The size of a compressed tile. Each compressed tile takes 2 bits
+ * in the ZMASK RAM, so there is always 16 tiles per one dword. */
+enum r300_zmask_compression {
+   R300_ZCOMP_4X4 = 4,
+   R300_ZCOMP_8X8 = 8
+};
+
 /* Structure containing all the possible information about a specific Radeon
  * in the R3xx, R4xx, and R5xx families. */
 struct r300_capabilities {
@@ -50,10 +57,12 @@ struct r300_capabilities {
     unsigned num_tex_units;
     /* Whether or not TCL is physically present */
     boolean has_tcl;
-    /* Some chipsets do not have HiZ RAM - other have varying amounts . */
+    /* Some chipsets do not have HiZ RAM - other have varying amounts. */
     int hiz_ram;
-    /*  some chipsets have zmask ram per pipe some don't */
+    /* Some chipsets have zmask ram per pipe some don't. */
     int zmask_ram;
+    /* Compression mode for ZMASK. */
+    enum r300_zmask_compression z_compress;
     /* Whether or not this is RV350 or newer, including all r400 and r500
      * chipsets. The differences compared to the oldest r300 chips are:
      * - Blend LTE/GTE thresholds
index e265bdb..552df2b 100644 (file)
@@ -94,6 +94,9 @@ static void r300_release_referenced_objects(struct r300_context *r300)
         remove_from_list(query);
         FREE(query);
     }
+
+    r300->context.delete_depth_stencil_alpha_state(&r300->context,
+                                                   r300->dsa_decompress_zmask);
 }
 
 static void r300_destroy_context(struct pipe_context* context)
@@ -116,9 +119,6 @@ static void r300_destroy_context(struct pipe_context* context)
     /* XXX: This function assumes r300->query_list was initialized */
     r300_release_referenced_objects(r300);
 
-    if (r300->zmask_mm)
-        r300_hyperz_destroy_mm(r300);
-
     if (r300->cs)
         r300->rws->cs_destroy(r300->cs);
 
@@ -238,7 +238,7 @@ static boolean r300_setup_atoms(struct r300_context* r300)
         if (has_hiz_ram)
             R300_INIT_ATOM(hiz_clear, 0);
         /* zmask clear */
-        R300_INIT_ATOM(zmask_clear, 0);
+        R300_INIT_ATOM(zmask_clear, 4);
     }
     /* ZB (unpipelined), SU. */
     R300_INIT_ATOM(query_start, 4);
@@ -513,6 +513,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
         r300->dummy_vb = screen->resource_create(screen, &vb);
     }
 
+    {
+        struct pipe_depth_stencil_alpha_state dsa = {};
+        dsa.depth.writemask = 1;
+
+        r300->dsa_decompress_zmask =
+            r300->context.create_depth_stencil_alpha_state(&r300->context,
+                                                           &dsa);
+    }
+
     return &r300->context;
 
  fail:
index 9030f1b..6e96ae8 100644 (file)
@@ -406,8 +406,6 @@ struct r300_texture {
 
     /* hyper-z memory allocs */
     struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS];
-    struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS];
-    boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS];
     boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS];
 
     /* This is the level tiling flags were last time set for.
@@ -589,15 +587,21 @@ struct r300_context {
     boolean two_sided_color;
     /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
     boolean incompatible_vb_layout;
-#define R300_Z_COMPRESS_44 1
-#define RV350_Z_COMPRESS_88 2
-    int z_compression;
+
     boolean cbzb_clear;
-    boolean z_decomp_rd;
+    /* Whether ZMASK is enabled. */
+    boolean zmask_in_use;
+    /* Whether ZMASK is being decompressed. */
+    boolean zmask_decompress;
+    /* Whether ZMASK is locked, i.e. should be disabled and cannot be taken over. */
+    boolean zmask_locked;
+    /* The zbuffer the ZMASK of which is locked. */
+    struct pipe_surface *locked_zbuffer;
+
+    void *dsa_decompress_zmask;
 
     /* two mem block managers for hiz/zmask ram space */
     struct mem_block *hiz_mm;
-    struct mem_block *zmask_mm;
 
     /* upload managers */
     struct u_upload_mgr *upload_vb;
@@ -687,7 +691,9 @@ void r300_init_state_functions(struct r300_context* r300);
 void r300_init_resource_functions(struct r300_context* r300);
 
 /* r300_blit.c */
-void r300_flush_depth_textures(struct r300_context *r300);
+void r300_decompress_zmask(struct r300_context *r300);
+void r300_decompress_zmask_locked_unsafe(struct r300_context *r300);
+void r300_decompress_zmask_locked(struct r300_context *r300);
 
 /* r300_query.c */
 void r300_resume_query(struct r300_context *r300,
@@ -713,8 +719,7 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias);
 /* r300_state.c */
 enum r300_fb_state_change {
     R300_CHANGED_FB_STATE = 0,
-    R300_CHANGED_CBZB_FLAG,
-    R300_CHANGED_ZCLEAR_FLAG,
+    R300_CHANGED_HYPERZ_FLAG,
     R300_CHANGED_MULTIWRITE
 };
 
index d14cdcb..54e2634 100644 (file)
@@ -433,6 +433,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
             tex = r300_texture(surf->base.texture);
 
             surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK;
+
             /* HiZ RAM. */
             if (r300->screen->caps.hiz_ram) {
                 if (tex->hiz_mem[level]) {
@@ -443,14 +444,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
                     OUT_CS_REG(R300_ZB_HIZ_PITCH, 0);
                 }
             }
+
             /* Z Mask RAM. (compressed zbuffer) */
-            if (tex->zmask_mem[level]) {
-                OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2);
-                OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch);
-            } else {
-                OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
-                OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0);
-            }
+            OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
+            OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch);
         }
     }
 
@@ -462,6 +459,7 @@ void r300_emit_hyperz_state(struct r300_context *r300,
 {
     struct r300_hyperz_state *z = state;
     CS_LOCALS(r300);
+
     if (z->flush)
         WRITE_CS_TABLE(&z->cb_flush_begin, size);
     else
@@ -1097,17 +1095,6 @@ static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint1
     END_CS;
 }
 
-static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val)
-{
-    CS_LOCALS(r300);
-    BEGIN_CS(4);
-    OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2);
-    OUT_CS(start);
-    OUT_CS(count);
-    OUT_CS(val);
-    END_CS;
-}
-
 #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
 
 void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
@@ -1153,42 +1140,49 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state
 {
     struct pipe_framebuffer_state *fb =
         (struct pipe_framebuffer_state*)r300->fb_state.state;
-    struct r300_screen* r300screen = r300->screen;
-    uint32_t stride, offset = 0;
-    struct r300_texture* tex;
-    uint32_t i, height;
-    int mult, offset_shift;
-
-    tex = r300_texture(fb->zsbuf->texture);
-    stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level];
-
-    offset = tex->zmask_mem[fb->zsbuf->u.tex.level]->ofs;
+    struct r300_texture *tex;
+    unsigned numdw, pipes;
+    unsigned compsize = r300->screen->caps.z_compress;
+    /* The tile size of 1 DWORD is:
+     *
+     * GPU    Pipes    4x4 mode   8x8 mode
+     * ------------------------------------------
+     * R580   4P/1Z    32x32      64x64
+     * RV570  3P/1Z    48x16      96x32
+     * RV530  1P/2Z    32x16      64x32
+     */
+    static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8};
+    static unsigned num_blocks_y_per_dw[4] = {4, 4,  4, 8};
+    CS_LOCALS(r300);
 
-    if (r300->z_compression == RV350_Z_COMPRESS_88)
-        mult = 8;
-    else
-        mult = 4;
+    if (r300->screen->caps.family == CHIP_FAMILY_RV530) {
+        pipes = r300->screen->caps.num_z_pipes;
+    } else {
+        pipes = r300->screen->caps.num_frag_pipes;
+    }
 
-    height = ALIGN_DIVUP(fb->zsbuf->height, mult);
+    tex = r300_texture(fb->zsbuf->texture);
 
-    offset_shift = 4;
-    offset_shift += (r300screen->caps.num_frag_pipes / 2);
-    stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes);
+    /* Get the zbuffer size (with the aligned width and height). */
+    numdw = align(tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level],
+                  num_blocks_x_per_dw[pipes-1] * compsize) *
+            align(fb->zsbuf->height,
+                  num_blocks_y_per_dw[pipes-1] * compsize);
 
-    /* okay have width in pixels - divide by block width */
-    stride = ALIGN_DIVUP(stride, mult);
-    /* have width in blocks - divide by number of fragment pipes screen width */
-    /* 16 blocks per dword */
-    stride = ALIGN_DIVUP(stride, 16);
+    /* Convert pixels -> dwords. */
+    numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize *
+                               num_blocks_y_per_dw[pipes-1] * compsize);
 
-    for (i = 0; i < height; i++) {
-        offset = i * stride;
-        offset <<= offset_shift;
-        r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff);
-    }
+    BEGIN_CS(size);
+    OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2);
+    OUT_CS(0);
+    OUT_CS(numdw);
+    OUT_CS(0);
+    END_CS;
 
     /* Mark the current zbuffer's zmask as in use. */
-    tex->zmask_in_use[fb->zsbuf->u.tex.level] = TRUE;
+    r300->zmask_in_use = TRUE;
+    r300_mark_atom_dirty(r300, &r300->hyperz_state);
 }
 
 void r300_emit_ztop_state(struct r300_context* r300,
index c22e307..d996d19 100644 (file)
@@ -127,7 +127,7 @@ static boolean r300_can_hiz(struct r300_context *r300)
                 z->current_func, dsa_state->z_stencil_control);
             return FALSE;
         }
-    }    
+    }
     return TRUE;
 }
 
@@ -139,7 +139,6 @@ static void r300_update_hyperz(struct r300_context* r300)
         (struct pipe_framebuffer_state*)r300->fb_state.state;
     struct r300_texture *zstex =
             fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL;
-    boolean zmask_in_use = FALSE;
     boolean hiz_in_use = FALSE;
 
     z->gb_z_peq_config = 0;
@@ -158,42 +157,40 @@ static void r300_update_hyperz(struct r300_context* r300)
     if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
         return;
 
-    zmask_in_use = zstex->zmask_in_use[fb->zsbuf->u.tex.level];
     hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level];
 
-    /* Z fastfill. */
-    if (zmask_in_use) {
-        z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /*  | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/
-    }
-
     /* Zbuffer compression. */
-    if (zmask_in_use && r300->z_compression) {
-        z->zb_bw_cntl |= R300_RD_COMP_ENABLE;
-        if (r300->z_decomp_rd == false)
+    if (r300->zmask_in_use && !r300->zmask_locked) {
+        z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
+                         /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/
+                         R300_RD_COMP_ENABLE;
+
+        if (!r300->zmask_decompress) {
             z->zb_bw_cntl |= R300_WR_COMP_ENABLE;
+        }
+
+        if (r300->screen->caps.z_compress == R300_ZCOMP_8X8) {
+            z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
+        }
     }
-    /* RV350 and up optimizations. */
-    /* The section 10.4.9 in the docs is a lie. */
-    if (r300->z_compression == RV350_Z_COMPRESS_88)
-        z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
-
-    if (hiz_in_use) {
-        bool can_hiz = r300_can_hiz(r300);
-        if (can_hiz) {
-            z->zb_bw_cntl |= R300_HIZ_ENABLE;
-            z->sc_hyperz |= R300_SC_HYPERZ_ENABLE;
-            z->sc_hyperz |= r300_get_sc_hz_max(r300);
-            z->zb_bw_cntl |= r300_get_hiz_min(r300);
+
+    if (hiz_in_use && r300_can_hiz(r300)) {
+        z->zb_bw_cntl |= R300_HIZ_ENABLE |
+                         r300_get_hiz_min(r300);
+
+        z->sc_hyperz |= R300_SC_HYPERZ_ENABLE |
+                        r300_get_sc_hz_max(r300);
+
+        if (r300->screen->caps.is_r500) {
+            z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3 |
+                             R500_HIZ_EQUAL_REJECT_ENABLE;
         }
     }
 
     /* R500-specific features and optimizations. */
     if (r300->screen->caps.is_r500) {
-        z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3;
-        z->zb_bw_cntl |=
-                R500_HIZ_EQUAL_REJECT_ENABLE |
-                R500_PEQ_PACKING_ENABLE |
-                R500_COVERED_PTR_MASKING_ENABLE;
+        z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE |
+                         R500_COVERED_PTR_MASKING_ENABLE;
     }
 }
 
@@ -297,26 +294,10 @@ static void r300_update_hiz_clear(struct r300_context *r300)
     r300->hiz_clear.size = height * 4;
 }
 
-static void r300_update_zmask_clear(struct r300_context *r300)
-{
-    struct pipe_framebuffer_state *fb =
-        (struct pipe_framebuffer_state*)r300->fb_state.state;
-    uint32_t height;
-    int mult;
-
-    if (r300->z_compression == RV350_Z_COMPRESS_88)
-        mult = 8;
-    else
-        mult = 4;
-
-    height = ALIGN_DIVUP(fb->zsbuf->height, mult);
-
-    r300->zmask_clear.size = height * 4;
-}
-
 void r300_update_hyperz_state(struct r300_context* r300)
 {
     r300_update_ztop(r300);
+
     if (r300->hyperz_state.dirty) {
         r300_update_hyperz(r300);
     }
@@ -324,9 +305,6 @@ void r300_update_hyperz_state(struct r300_context* r300)
     if (r300->hiz_clear.dirty) {
        r300_update_hiz_clear(r300);
     }
-    if (r300->zmask_clear.dirty) {
-       r300_update_zmask_clear(r300);
-    }
 }
 
 void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf)
@@ -345,43 +323,6 @@ void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf)
     ndw = ALIGN_DIVUP(zsize, 64);
 
     tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0);
-    return;
-}
-
-void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress)
-{
-    int bsize = 256;
-    uint32_t zsize, ndw;
-    int level = surf->base.u.tex.level;
-    struct r300_texture *tex;
-
-    tex = r300_texture(surf->base.texture);
-
-    /* We currently don't handle decompression for 3D textures and cubemaps
-     * correctly. */
-    if (tex->desc.b.b.target != PIPE_TEXTURE_1D &&
-        tex->desc.b.b.target != PIPE_TEXTURE_2D &&
-        tex->desc.b.b.target != PIPE_TEXTURE_RECT)
-        return;
-
-    /* Cannot flush zmask of 16-bit zbuffers. */
-    if (util_format_get_blocksizebits(tex->desc.b.b.format) == 16)
-        return;
-
-    if (tex->zmask_mem[level])
-        return;
-
-    zsize = tex->desc.layer_size_in_bytes[level];
-    zsize /= util_format_get_blocksize(tex->desc.b.b.format);
-
-    /* each zmask dword represents 16 4x4 blocks - which is 256 pixels
-       or 16 8x8 depending on the gb peq flag = 1024 pixels */
-    if (compress == RV350_Z_COMPRESS_88)
-        bsize = 1024;
-
-    ndw = ALIGN_DIVUP(zsize, bsize);
-    tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0);
-    return;
 }
 
 boolean r300_hyperz_init_mm(struct r300_context *r300)
@@ -389,15 +330,9 @@ boolean r300_hyperz_init_mm(struct r300_context *r300)
     struct r300_screen* r300screen = r300->screen;
     int frag_pipes = r300screen->caps.num_frag_pipes;
 
-    r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes);
-    if (!r300->zmask_mm)
-      return FALSE;
-
     if (r300screen->caps.hiz_ram) {
       r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes);
       if (!r300->hiz_mm) {
-        u_mmDestroy(r300->zmask_mm);
-        r300->zmask_mm = NULL;
         return FALSE;
       }
     }
@@ -413,7 +348,4 @@ void r300_hyperz_destroy_mm(struct r300_context *r300)
       u_mmDestroy(r300->hiz_mm);
       r300->hiz_mm = NULL;
     }
-
-    u_mmDestroy(r300->zmask_mm);
-    r300->zmask_mm = NULL;
 }
index 30a23ec..d4c8e7c 100644 (file)
@@ -28,8 +28,8 @@ struct r300_context;
 void r300_update_hyperz_state(struct r300_context* r300);
 
 void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf);
-void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress);
 
 boolean r300_hyperz_init_mm(struct r300_context *r300);
 void r300_hyperz_destroy_mm(struct r300_context *r300);
+
 #endif
index 2664c1d..ba456d4 100644 (file)
@@ -694,8 +694,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
     }
 
     if (change == R300_CHANGED_FB_STATE ||
-        change == R300_CHANGED_CBZB_FLAG ||
-        change == R300_CHANGED_ZCLEAR_FLAG) {
+        change == R300_CHANGED_HYPERZ_FLAG) {
         r300_mark_atom_dirty(r300, &r300->hyperz_state);
     }
 
@@ -719,8 +718,8 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
 }
 
 static void
-    r300_set_framebuffer_state(struct pipe_context* pipe,
-                               const struct pipe_framebuffer_state* state)
+r300_set_framebuffer_state(struct pipe_context* pipe,
+                           const struct pipe_framebuffer_state* state)
 {
     struct r300_context* r300 = r300_context(pipe);
     struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
@@ -728,7 +727,6 @@ static void
     boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
     unsigned max_width, max_height, i;
     uint32_t zbuffer_bpp = 0;
-    int blocksize;
 
     if (r300->screen->caps.is_r500) {
         max_width = max_height = 4096;
@@ -744,6 +742,32 @@ static void
         return;
     }
 
+    if (old_state->zsbuf && r300->zmask_in_use && !r300->zmask_locked) {
+        /* There is a zmask in use, what are we gonna do? */
+        if (state->zsbuf) {
+            if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) {
+                /* Decompress the currently bound zbuffer before we bind another one. */
+                r300_decompress_zmask(r300);
+            }
+        } else {
+            /* We don't bind another zbuffer, so lock the current one. */
+            r300->zmask_locked = TRUE;
+            pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf);
+        }
+    } else if (r300->zmask_locked && r300->locked_zbuffer) {
+        /* We have a locked zbuffer now, what are we gonna do? */
+        if (state->zsbuf) {
+            if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) {
+                /* We are binding some other zbuffer, so decompress the locked one,
+                 * it gets unlocked automatically. */
+                r300_decompress_zmask_locked_unsafe(r300);
+            } else {
+                /* We are binding the locked zbuffer again, so unlock it. */
+                r300->zmask_locked = FALSE;
+            }
+        }
+    }
+
     /* If nr_cbufs is changed from zero to non-zero or vice versa... */
     if (!!old_state->nr_cbufs != !!state->nr_cbufs) {
         r300_mark_atom_dirty(r300, &r300->blend_state);
@@ -758,14 +782,15 @@ static void
 
     util_copy_framebuffer_state(r300->fb_state.state, state);
 
+    if (!r300->zmask_locked) {
+        pipe_surface_reference(&r300->locked_zbuffer, NULL);
+    }
+
     r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
     r300->validate_buffers = TRUE;
 
-    r300->z_compression = false;
-    
     if (state->zsbuf) {
-        blocksize = util_format_get_blocksize(state->zsbuf->texture->format);
-        switch (blocksize) {
+        switch (util_format_get_blocksize(state->zsbuf->texture->format)) {
         case 2:
             zbuffer_bpp = 16;
             break;
@@ -773,30 +798,19 @@ static void
             zbuffer_bpp = 24;
             break;
         }
+
+        /* Setup Hyper-Z. */
         if (can_hyperz) {
             struct r300_surface *zs_surf = r300_surface(state->zsbuf);
-            struct r300_texture *tex;
-            int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44;
+            struct r300_texture *tex = r300_texture(zs_surf->base.texture);
             int level = zs_surf->base.u.tex.level;
 
-            tex = r300_texture(zs_surf->base.texture);
-
             /* work out whether we can support hiz on this buffer */
             r300_hiz_alloc_block(r300, zs_surf);
-        
-            /* work out whether we can support zmask features on this buffer */
-            r300_zmask_alloc_block(r300, zs_surf, compress);
-
-            if (tex->zmask_mem[level]) {
-                /* compression causes hangs on 16-bit */
-                if (zbuffer_bpp == 24)
-                    r300->z_compression = compress;
-            }
+
             DBG(r300, DBG_HYPERZ,
-                "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0,
-                tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef,
-                r300->z_compression, tex->zmask_mem[level] ? 1 : 0,
-                tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef);
+                "hyper-z features: hiz: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0,
+                tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef);
         }
 
         /* Polygon offset depends on the zbuffer bit depth. */
index 95be784..de4c271 100644 (file)
@@ -862,11 +862,35 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
     }
 }
 
-void r300_update_derived_state(struct r300_context* r300)
+static void r300_decompress_depth_textures(struct r300_context *r300)
 {
-    r300_flush_depth_textures(r300);
+    struct r300_textures_state *state =
+        (struct r300_textures_state*)r300->textures_state.state;
+    struct pipe_resource *tex;
+    unsigned count = MIN2(state->sampler_view_count,
+                          state->sampler_state_count);
+    unsigned i;
+
+    if (!r300->zmask_locked || !r300->locked_zbuffer) {
+        return;
+    }
+
+    for (i = 0; i < count; i++) {
+        if (state->sampler_views[i] && state->sampler_states[i]) {
+            tex = state->sampler_views[i]->base.texture;
 
+            if (tex == r300->locked_zbuffer->texture) {
+                r300_decompress_zmask_locked(r300);
+                return;
+            }
+        }
+    }
+}
+
+void r300_update_derived_state(struct r300_context* r300)
+{
     if (r300->textures_state.dirty) {
+        r300_decompress_depth_textures(r300);
         r300_merge_textures_and_samplers(r300);
     }
 
index a5fbe85..ca27628 100644 (file)
@@ -743,8 +743,6 @@ static void r300_texture_destroy(struct pipe_screen *screen,
     for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) {
         if (tex->hiz_mem[i])
             u_mmFreeMem(tex->hiz_mem[i]);
-        if (tex->zmask_mem[i])
-            u_mmFreeMem(tex->zmask_mem[i]);
     }
 
     FREE(tex);