nvfx: new 2D: new render temporaries with resources
authorLuca Barbieri <luca@luca-barbieri.com>
Tue, 3 Aug 2010 03:47:41 +0000 (05:47 +0200)
committerLuca Barbieri <luca@luca-barbieri.com>
Sat, 21 Aug 2010 18:42:14 +0000 (20:42 +0200)
This patch adds support for creating temporary surfaces to allow
rendering to surfaces that cannot be rendered to.
It uses the _second_ version of the render temporary infrastructure.

This is necessary for swizzled 3D textures and small mipmaps of
swizzled 2D textures.

This version of the patch creates a resource to use as a temporary
instead of a raw BO, making the code simpler.

src/gallium/drivers/nvfx/nvfx_context.c
src/gallium/drivers/nvfx/nvfx_context.h
src/gallium/drivers/nvfx/nvfx_fragtex.c
src/gallium/drivers/nvfx/nvfx_miptree.c
src/gallium/drivers/nvfx/nvfx_resource.h
src/gallium/drivers/nvfx/nvfx_state_emit.c
src/gallium/drivers/nvfx/nvfx_state_fb.c
src/gallium/drivers/nvfx/nvfx_surface.c

index 3d45f5f..7ab81de 100644 (file)
@@ -15,6 +15,7 @@ nvfx_flush(struct pipe_context *pipe, unsigned flags,
        struct nouveau_channel *chan = screen->base.channel;
        struct nouveau_grobj *eng3d = screen->eng3d;
 
+       /* XXX: we need to actually be intelligent here */
        if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
                BEGIN_RING(chan, eng3d, 0x1fd8, 1);
                OUT_RING  (chan, 2);
@@ -87,5 +88,8 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
        /* set these to that we init them on first validation */
        nvfx->state.scissor_enabled = ~0;
        nvfx->state.stipple_enabled = ~0;
+
+       LIST_INITHEAD(&nvfx->render_cache);
+
        return &nvfx->pipe;
 }
index 278be94..a6ea913 100644 (file)
@@ -11,6 +11,7 @@
 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "util/u_inlines.h"
+#include "util/u_double_list.h"
 
 #include "draw/draw_vertex.h"
 #include "util/u_blitter.h"
@@ -67,6 +68,7 @@ struct nvfx_state {
        unsigned scissor_enabled;
        unsigned stipple_enabled;
        unsigned fp_samplers;
+       unsigned render_temps;
 };
 
 struct nvfx_vtxelt_state {
@@ -90,6 +92,7 @@ struct nvfx_context {
 
        struct draw_context *draw;
        struct blitter_context* blitter;
+       struct list_head render_cache;
 
        /* HW state derived from pipe states */
        struct nvfx_state state;
@@ -185,7 +188,8 @@ extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe,
 extern void nvfx_vtxfmt_validate(struct nvfx_context *nvfx);
 
 /* nvfx_fb.c */
-extern void nvfx_state_framebuffer_validate(struct nvfx_context *nvfx);
+extern int nvfx_framebuffer_prepare(struct nvfx_context *nvfx);
+extern void nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result);
 void
 nvfx_framebuffer_relocate(struct nvfx_context *nvfx);
 
index 0b4a434..6605745 100644 (file)
@@ -16,6 +16,10 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx)
                samplers &= ~(1 << unit);
 
                if(nvfx->fragment_sampler_views[unit] && nvfx->tex_sampler[unit]) {
+                       util_dirty_surfaces_use_for_sampling(&nvfx->pipe,
+                                       &((struct nvfx_miptree*)nvfx->fragment_sampler_views[unit]->texture)->dirty_surfaces,
+                                       nvfx_surface_flush);
+
                        if(!nvfx->is_nv4x)
                                nv30_fragtex_set(nvfx, unit);
                        else
index 7deb9d7..530d705 100644 (file)
@@ -11,6 +11,7 @@
 #include "nvfx_screen.h"
 #include "nvfx_resource.h"
 #include "nvfx_transfer.h"
+#include "nv04_2d.h"
 
 static void
 nvfx_miptree_choose_format(struct nvfx_miptree *mt)
@@ -115,16 +116,23 @@ nvfx_miptree_get_handle(struct pipe_screen *pscreen,
 
 
 static void
+nvfx_miptree_surface_final_destroy(struct pipe_surface* ps)
+{
+       struct nvfx_surface* ns = (struct nvfx_surface*)ps;
+       pipe_resource_reference(&ps->texture, 0);
+       pipe_resource_reference((struct pipe_resource**)&ns->temp, 0);
+       FREE(ps);
+}
+
+static void
 nvfx_miptree_destroy(struct pipe_screen *screen, struct pipe_resource *pt)
 {
        struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
+       util_surfaces_destroy(&mt->surfaces, pt, nvfx_miptree_surface_final_destroy);
        nouveau_screen_bo_release(screen, mt->base.bo);
        FREE(mt);
 }
 
-
-
-
 struct u_resource_vtbl nvfx_miptree_vtbl = 
 {
    nvfx_miptree_get_handle,          /* get_handle */
@@ -152,6 +160,8 @@ nvfx_miptree_create_skeleton(struct pipe_screen *pscreen, const struct pipe_reso
 
         mt->base.base = *pt;
         mt->base.vtbl = &nvfx_miptree_vtbl;
+        util_dirty_surfaces_init(&mt->dirty_surfaces);
+
         pipe_reference_init(&mt->base.base.reference, 1);
         mt->base.base.screen = pscreen;
 
@@ -218,29 +228,28 @@ nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
                         unsigned face, unsigned level, unsigned zslice,
                         unsigned flags)
 {
+       struct nvfx_miptree* mt = (struct nvfx_miptree*)pt;
        struct nvfx_surface *ns;
 
-       ns = CALLOC_STRUCT(nvfx_surface);
-       if (!ns)
-               return NULL;
-       pipe_resource_reference(&ns->base.texture, pt);
-       ns->base.format = pt->format;
-       ns->base.width = u_minify(pt->width0, level);
-       ns->base.height = u_minify(pt->height0, level);
-       ns->base.usage = flags;
-       pipe_reference_init(&ns->base.reference, 1);
-       ns->base.face = face;
-       ns->base.level = level;
-       ns->base.zslice = zslice;
-       ns->pitch = nvfx_subresource_pitch(pt, level);
-       ns->base.offset = nvfx_subresource_offset(pt, face, level, zslice);
-
-       return &ns->base;
+       ns = (struct nvfx_surface*)util_surfaces_get(&mt->surfaces, sizeof(struct nvfx_surface), pscreen, pt, face, level, zslice, flags);
+       if(ns->base.base.offset == ~0) {
+               util_dirty_surface_init(&ns->base);
+               ns->pitch = nvfx_subresource_pitch(pt, level);
+               ns->base.base.offset = nvfx_subresource_offset(pt, face, level, zslice);
+       }
+
+       return &ns->base.base;
 }
 
 void
 nvfx_miptree_surface_del(struct pipe_surface *ps)
 {
-       pipe_resource_reference(&ps->texture, NULL);
-       FREE(ps);
+       struct nvfx_surface* ns = (struct nvfx_surface*)ps;
+
+       if(!ns->temp)
+       {
+               util_surfaces_detach(&((struct nvfx_miptree*)ps->texture)->surfaces, ps);
+               pipe_resource_reference(&ps->texture, 0);
+               FREE(ps);
+       }
 }
index 42d04eb..be1845d 100644 (file)
@@ -1,13 +1,16 @@
-
 #ifndef NVFX_RESOURCE_H
 #define NVFX_RESOURCE_H
 
 #include "util/u_transfer.h"
 #include "util/u_format.h"
 #include "util/u_math.h"
+#include "util/u_double_list.h"
+#include "util/u_surfaces.h"
+#include "util/u_dirty_surfaces.h"
 #include <nouveau/nouveau_bo.h>
 
 struct pipe_resource;
+struct nv04_region;
 
 
 /* This gets further specialized into either buffer or texture
@@ -38,17 +41,34 @@ nvfx_resource_on_gpu(struct pipe_resource* pr)
 
 #define NVFX_MAX_TEXTURE_LEVELS  16
 
+/* We have the following invariants for render temporaries
+ *
+ * 1. Render temporaries are always linear
+ * 2. Render temporaries are always up to date
+ * 3. Currently, render temporaries are destroyed when the resource is used for sampling, but kept for any other use
+ *
+ * Also, we do NOT flush temporaries on any pipe->flush().
+ * This is fine, as long as scanout targets and shared resources never need temps.
+ *
+ * TODO: we may want to also support swizzled temporaries to improve performance in some cases.
+ */
+
 struct nvfx_miptree {
         struct nvfx_resource base;
 
         unsigned linear_pitch; /* for linear textures, 0 for swizzled and compressed textures with level-dependent minimal pitch */
         unsigned face_size; /* 128-byte aligned face/total size */
         unsigned level_offset[NVFX_MAX_TEXTURE_LEVELS];
+
+        struct util_surfaces surfaces;
+        struct util_dirty_surfaces dirty_surfaces;
 };
 
 struct nvfx_surface {
-       struct pipe_surface base;
+       struct util_dirty_surface base;
        unsigned pitch;
+
+       struct nvfx_miptree* temp;
 };
 
 static INLINE 
@@ -65,6 +85,12 @@ nvfx_surface_buffer(struct pipe_surface *surf)
        return mt->bo;
 }
 
+static INLINE struct util_dirty_surfaces*
+nvfx_surface_get_dirty_surfaces(struct pipe_surface* surf)
+{
+       struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture;
+       return &mt->dirty_surfaces;
+}
 
 void
 nvfx_init_resource_functions(struct pipe_context *pipe);
@@ -141,4 +167,10 @@ nvfx_subresource_pitch(struct pipe_resource* pt, unsigned level)
        }
 }
 
+void
+nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf);
+
+void
+nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf);
+
 #endif
index f91ae19..dc70f3d 100644 (file)
@@ -1,15 +1,48 @@
 #include "nvfx_context.h"
 #include "nvfx_state.h"
+#include "nvfx_resource.h"
 #include "draw/draw_context.h"
 
 static boolean
 nvfx_state_validate_common(struct nvfx_context *nvfx)
 {
        struct nouveau_channel* chan = nvfx->screen->base.channel;
-       unsigned dirty = nvfx->dirty;
+       unsigned dirty;
+       int all_swizzled = -1;
+       boolean flush_tex_cache = FALSE;
 
        if(nvfx != nvfx->screen->cur_ctx)
-               dirty = ~0;
+       {
+               nvfx->dirty = ~0;
+               nvfx->screen->cur_ctx = nvfx;
+       }
+
+       /* These can trigger use the of 3D engine to copy temporaries.
+        * That will recurse here and thus dirty all 3D state, so we need to this before anything else, and in a loop..
+        * This converges to having clean temps, then binding both fragtexes and framebuffers.
+        */
+       while(nvfx->dirty & (NVFX_NEW_FB | NVFX_NEW_SAMPLER))
+       {
+               if(nvfx->dirty & NVFX_NEW_SAMPLER)
+               {
+                       nvfx->dirty &=~ NVFX_NEW_SAMPLER;
+                       nvfx_fragtex_validate(nvfx);
+
+                       // TODO: only set this if really necessary
+                       flush_tex_cache = TRUE;
+               }
+
+               if(nvfx->dirty & NVFX_NEW_FB)
+               {
+                       nvfx->dirty &=~ NVFX_NEW_FB;
+                       all_swizzled = nvfx_framebuffer_prepare(nvfx);
+
+                       // TODO: make sure this doesn't happen, i.e. fbs have matching formats
+                       assert(all_swizzled >= 0);
+               }
+       }
+
+       dirty = nvfx->dirty;
 
        if(nvfx->render_mode == HW)
        {
@@ -35,9 +68,6 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
                        nvfx_vtxfmt_validate(nvfx);
        }
 
-       if(dirty & NVFX_NEW_FB)
-               nvfx_state_framebuffer_validate(nvfx);
-
        if(dirty & NVFX_NEW_RAST)
                sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len);
 
@@ -48,10 +78,14 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
                nvfx_state_stipple_validate(nvfx);
 
        if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST))
+       {
                nvfx_fragprog_validate(nvfx);
+               if(dirty & NVFX_NEW_FRAGPROG)
+                       flush_tex_cache = TRUE; // TODO: do we need this?
+       }
 
-       if(dirty & NVFX_NEW_SAMPLER)
-               nvfx_fragtex_validate(nvfx);
+       if(all_swizzled >= 0)
+               nvfx_framebuffer_validate(nvfx, all_swizzled);
 
        if(dirty & NVFX_NEW_BLEND)
                sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len);
@@ -72,13 +106,17 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
        if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_FB))
                nvfx_state_viewport_validate(nvfx);
 
-       /* TODO: could nv30 need this or something similar too? */
-       if((dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SAMPLER)) && nvfx->is_nv4x) {
-               WAIT_RING(chan, 4);
-               OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
-               OUT_RING(chan, 2);
-               OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
-               OUT_RING(chan, 1);
+       if(flush_tex_cache)
+       {
+               // TODO: what about nv30?
+               if(nvfx->is_nv4x)
+               {
+                       WAIT_RING(chan, 4);
+                       OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
+                       OUT_RING(chan, 2);
+                       OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1));
+                       OUT_RING(chan, 1);
+               }
        }
        nvfx->dirty = 0;
        return TRUE;
@@ -99,6 +137,21 @@ nvfx_state_emit(struct nvfx_context *nvfx)
              ;
        MARK_RING(chan, max_relocs * 2, max_relocs * 2);
        nvfx_state_relocate(nvfx);
+
+       unsigned render_temps = nvfx->state.render_temps;
+       if(render_temps)
+       {
+               for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
+               {
+                       if(render_temps & (1 << i))
+                               util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]),
+                                               (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]);
+               }
+
+               if(render_temps & 0x80)
+                       util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf),
+                                       (struct util_dirty_surface*)nvfx->framebuffer.zsbuf);
+       }
 }
 
 void
index e111d11..80b0f21 100644 (file)
@@ -1,19 +1,56 @@
 #include "nvfx_context.h"
 #include "nvfx_resource.h"
 #include "nouveau/nouveau_util.h"
+#include "util/u_format.h"
 
-void
-nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
+static inline boolean
+nvfx_surface_linear_renderable(struct pipe_surface* surf)
+{
+       return (surf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)
+               && !(surf->offset & 63)
+               && !(((struct nvfx_surface*)surf)->pitch & 63);
+}
+
+static inline boolean
+nvfx_surface_swizzled_renderable(struct pipe_framebuffer_state* fb, struct pipe_surface* surf)
+{
+       /* TODO: return FALSE if we have a format not supporting swizzled rendering (e.g. r8); currently those are not supported at all */
+       return !((struct nvfx_miptree*)surf->texture)->linear_pitch
+               && (surf->texture->target != PIPE_TEXTURE_3D || u_minify(surf->texture->depth0, surf->level) <= 1)
+               && !(surf->offset & 127)
+               && (surf->width == fb->width)
+               && (surf->height == fb->height)
+               && !((struct nvfx_surface*)surf)->temp;
+}
+
+static boolean
+nvfx_surface_get_render_target(struct pipe_surface* surf, int all_swizzled, struct nvfx_render_target* target)
+{
+       struct nvfx_surface* ns = (struct nvfx_surface*)surf;
+       if(!ns->temp)
+       {
+               target->bo = ((struct nvfx_miptree*)surf->texture)->base.bo;
+               target->offset = surf->offset;
+               target->pitch = align(ns->pitch, 64);
+               assert(target->pitch);
+               return FALSE;
+       }
+       else
+       {
+               target->offset = 0;
+               target->pitch = ns->temp->linear_pitch;
+               target->bo = ns->temp->base.bo;
+               assert(target->pitch);
+               return TRUE;
+       }
+}
+
+int
+nvfx_framebuffer_prepare(struct nvfx_context *nvfx)
 {
        struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
-       struct nouveau_channel *chan = nvfx->screen->base.channel;
-       uint32_t rt_enable = 0, rt_format = 0;
-       int i, colour_format = 0, zeta_format = 0;
-       int depth_only = 0;
-       unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
-       unsigned w = fb->width;
-       unsigned h = fb->height;
-       int colour_bits = 32, zeta_bits = 32;
+       int i, color_format = 0, zeta_format = 0;
+       int all_swizzled = 1;
 
        if(!nvfx->is_nv4x)
                assert(fb->nr_cbufs <= 2);
@@ -21,113 +58,135 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
                assert(fb->nr_cbufs <= 4);
 
        for (i = 0; i < fb->nr_cbufs; i++) {
-               if (colour_format)
-                       assert(colour_format == fb->cbufs[i]->format);
-               else
-                       colour_format = fb->cbufs[i]->format;
-
-               rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i);
-               nvfx->hw_rt[i].bo = ((struct nvfx_miptree*)fb->cbufs[i]->texture)->base.bo;
-               nvfx->hw_rt[i].offset = fb->cbufs[i]->offset;
-               nvfx->hw_rt[i].pitch = ((struct nvfx_surface *)fb->cbufs[i])->pitch;
+               if (color_format) {
+                       if(color_format != fb->cbufs[i]->format)
+                               return -1;
+               } else
+                       color_format = fb->cbufs[i]->format;
+
+               if(!nvfx_surface_swizzled_renderable(fb, fb->cbufs[i]))
+                       all_swizzled = 0;
        }
-       for(; i < 4; ++i)
-               nvfx->hw_rt[i].bo = 0;
 
+       if (fb->zsbuf) {
+               /* TODO: return FALSE if we have a format not supporting a depth buffer (e.g. r8); currently those are not supported at all */
+               if(!nvfx_surface_swizzled_renderable(fb, fb->zsbuf))
+                       all_swizzled = 0;
+
+               if(all_swizzled && util_format_get_blocksize(color_format) != util_format_get_blocksize(zeta_format))
+                       all_swizzled = 0;
+       }
+
+       for (i = 0; i < fb->nr_cbufs; i++) {
+               if(!((struct nvfx_surface*)fb->cbufs[i])->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->cbufs[i]))
+                       nvfx_surface_create_temp(&nvfx->pipe, fb->cbufs[i]);
+       }
+
+       if(fb->zsbuf) {
+               if(!((struct nvfx_surface*)fb->zsbuf)->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->zsbuf))
+                       nvfx_surface_create_temp(&nvfx->pipe, fb->zsbuf);
+       }
+
+       return all_swizzled;
+}
+
+void
+nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
+{
+       struct pipe_framebuffer_state *fb = &nvfx->framebuffer;
+       struct nouveau_channel *chan = nvfx->screen->base.channel;
+       uint32_t rt_enable, rt_format;
+       int i;
+       unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
+       unsigned w = fb->width;
+       unsigned h = fb->height;
+
+       rt_enable = (NV34TCL_RT_ENABLE_COLOR0 << fb->nr_cbufs) - 1;
        if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 |
                         NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3))
                rt_enable |= NV34TCL_RT_ENABLE_MRT;
 
+       nvfx->state.render_temps = 0;
+
+       for (i = 0; i < fb->nr_cbufs; i++)
+               nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->cbufs[i], prepare_result, &nvfx->hw_rt[i]) << i;
+
+       for(; i < 4; ++i)
+               nvfx->hw_rt[i].bo = 0;
+
        if (fb->zsbuf) {
-               zeta_format = fb->zsbuf->format;
-               nvfx->hw_zeta.bo = ((struct nvfx_miptree*)fb->zsbuf->texture)->base.bo;
-               nvfx->hw_zeta.offset = fb->zsbuf->offset;
-               nvfx->hw_zeta.pitch = ((struct nvfx_surface *)fb->zsbuf)->pitch;
-       }
-       else
-               nvfx->hw_zeta.bo = 0;
-
-       if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0 | NV34TCL_RT_ENABLE_COLOR1 |
-               NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) {
-               /* Render to at least a colour buffer */
-               if (!(fb->cbufs[0]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) {
-                       assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
-                       for (i = 1; i < fb->nr_cbufs; i++)
-                               assert(!(fb->cbufs[i]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR));
-
-                       rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
-                               (log2i(fb->cbufs[0]->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
-                               (log2i(fb->cbufs[0]->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
-               }
-               else
-                       rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
-       } else if (fb->zsbuf) {
-               depth_only = 1;
-
-               /* Render to depth buffer only */
-               if (!(fb->zsbuf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) {
-                       assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
-
-                       rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
-                               (log2i(fb->zsbuf->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
-                               (log2i(fb->zsbuf->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
-               }
-               else
-                       rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
-       } else {
-               return;
+               nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->zsbuf, prepare_result, &nvfx->hw_zeta) << 7;
+
+               assert(util_format_get_stride(fb->zsbuf->format, fb->width) <= nvfx->hw_zeta.pitch);
+               assert(nvfx->hw_zeta.offset + nvfx->hw_zeta.pitch * fb->height <= nvfx->hw_zeta.bo->size);
        }
 
-       switch (colour_format) {
-       case PIPE_FORMAT_B8G8R8X8_UNORM:
-               rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8;
-               break;
-       case PIPE_FORMAT_B8G8R8A8_UNORM:
-       case 0:
-               rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
-               break;
-       case PIPE_FORMAT_B5G6R5_UNORM:
+       if (prepare_result) {
+               assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+
+               rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
+                       (log2i(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
+                       (log2i(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+       } else
+               rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
+
+       if(fb->nr_cbufs > 0) {
+               switch (fb->cbufs[0]->format) {
+               case PIPE_FORMAT_B8G8R8X8_UNORM:
+                       rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8;
+                       break;
+               case PIPE_FORMAT_B8G8R8A8_UNORM:
+               case 0:
+                       rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
+                       break;
+               case PIPE_FORMAT_B5G6R5_UNORM:
+                       rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
+                       break;
+               default:
+                       assert(0);
+               }
+       } else if(fb->zsbuf && util_format_get_blocksize(fb->zsbuf->format) == 2)
                rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
-               colour_bits = 16;
-               break;
-       default:
-               assert(0);
-       }
+       else
+               rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
 
-       switch (zeta_format) {
-       case PIPE_FORMAT_Z16_UNORM:
+       if(fb->zsbuf) {
+               switch (fb->zsbuf->format) {
+               case PIPE_FORMAT_Z16_UNORM:
+                       rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
+                       break;
+               case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+               case PIPE_FORMAT_X8Z24_UNORM:
+               case 0:
+                       rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
+                       break;
+               default:
+                       assert(0);
+               }
+       } else if(fb->nr_cbufs && util_format_get_blocksize(fb->cbufs[0]->format) == 2)
                rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
-               zeta_bits = 16;
-               break;
-       case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
-       case PIPE_FORMAT_X8Z24_UNORM:
-       case 0:
+       else
                rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8;
-               break;
-       default:
-               assert(0);
-       }
 
-       if ((!nvfx->is_nv4x) && colour_bits > zeta_bits) {
-               /* TODO: does this limitation really exist?
-                  TODO: can it be worked around somehow? */
-               assert(0);
-       }
+       if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) || fb->zsbuf) {
+               struct nvfx_render_target *rt0 = &nvfx->hw_rt[0];
+               uint32_t pitch;
+
+               if(!(rt_enable & NV34TCL_RT_ENABLE_COLOR0))
+                       rt0 = &nvfx->hw_zeta;
 
-       if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0)
-               || ((!nvfx->is_nv4x) && depth_only)) {
-               struct nvfx_render_target *rt0 = (depth_only ? &nvfx->hw_zeta : &nvfx->hw_rt[0]);
-               uint32_t pitch = rt0->pitch;
+               pitch = rt0->pitch;
 
                if(!nvfx->is_nv4x)
                {
-                       if (nvfx->hw_zeta.bo) {
+                       if (nvfx->hw_zeta.bo)
                                pitch |= (nvfx->hw_zeta.pitch << 16);
-                       } else {
+                       else
                                pitch |= (pitch << 16);
-                       }
                }
 
+               //printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch);
+
                OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 1));
                OUT_RELOC(chan, rt0->bo, 0,
                              rt_flags | NOUVEAU_BO_OR,
@@ -180,7 +239,7 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
                }
        }
 
-       if (zeta_format) {
+       if (fb->zsbuf) {
                OUT_RING(chan, RING_3D(NV34TCL_DMA_ZETA, 1));
                OUT_RELOC(chan, nvfx->hw_zeta.bo, 0,
                              rt_flags | NOUVEAU_BO_OR,
index a97f342..8208c67 100644 (file)
@@ -94,23 +94,44 @@ nvfx_region_fixup_swizzled(struct nv04_region* rgn, unsigned zslice, unsigned wi
 }
 
 static INLINE void
-nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y)
+nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, bool for_write)
 {
-       rgn->bo = ((struct nvfx_resource*)surf->base.texture)->bo;
-       rgn->offset = surf->base.offset;
-       rgn->pitch = surf->pitch;
        rgn->x = x;
        rgn->y = y;
        rgn->z = 0;
+       nvfx_region_set_format(rgn, surf->base.base.format);
 
-       nvfx_region_set_format(rgn, surf->base.format);
-       if(!(surf->base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR))
-               nvfx_region_fixup_swizzled(rgn, surf->base.zslice, surf->base.width, surf->base.height, u_minify(surf->base.texture->depth0, surf->base.level));
+       if(surf->temp)
+       {
+               rgn->bo = surf->temp->base.bo;
+               rgn->offset = 0;
+               rgn->pitch = surf->temp->linear_pitch;
+
+               if(for_write)
+                       util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(&surf->base.base), &surf->base);
+       } else {
+               rgn->bo = ((struct nvfx_resource*)surf->base.base.texture)->bo;
+               rgn->offset = surf->base.base.offset;
+               rgn->pitch = surf->pitch;
+
+               if(!(surf->base.base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR))
+                       nvfx_region_fixup_swizzled(rgn, surf->base.base.zslice, surf->base.base.width, surf->base.base.height, u_minify(surf->base.base.texture->depth0, surf->base.base.level));
+       }
 }
 
 static INLINE void
-nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z)
+nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z, bool for_write)
 {
+       if(pt->target != PIPE_BUFFER)
+       {
+               struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, sub.face, sub.level, z);
+               if(ns && util_dirty_surface_is_dirty(&ns->base))
+               {
+                       nvfx_region_init_for_surface(rgn, ns, x, y, for_write);
+                       return;
+               }
+       }
+
        rgn->bo = ((struct nvfx_resource*)pt)->bo;
        rgn->offset = nvfx_subresource_offset(pt, sub.face, sub.level, z);
        rgn->pitch = nvfx_subresource_pitch(pt, sub.level);
@@ -165,6 +186,7 @@ nv04_scaled_image_format(enum pipe_format format)
        }
 }
 
+// XXX: must save index buffer too!
 static struct blitter_context*
 nvfx_get_blitter(struct pipe_context* pipe, int copy)
 {
@@ -237,8 +259,8 @@ nvfx_resource_copy_region(struct pipe_context *pipe,
        int dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING;
        int src_on_gpu = nvfx_resource_on_gpu(srcr);
 
-       nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz);
-       nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz);
+       nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz, TRUE);
+       nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz, FALSE);
        w = util_format_get_stride(dstr->format, w) >> dst.bpps;
        h = util_format_get_nblocksy(dstr->format, h);
 
@@ -293,10 +315,11 @@ nvfx_surface_fill(struct pipe_context* pipe, struct pipe_surface *dsts,
        struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d;
        struct nv04_region dst;
        /* Always try to use the GPU right now, if possible
-        * If the user wanted the surface data on the CPU, he would have cleared with memset */
+        * If the user wanted the surface data on the CPU, he would have cleared with memset (hopefully) */
 
        // we don't care about interior pixel order since we set all them to the same value
-       nvfx_region_init_for_surface(&dst, (struct nvfx_surface*)dsts, dx, dy);
+       nvfx_region_init_for_surface(&dst, (struct nvfx_surface*)dsts, dx, dy, TRUE);
+
        w = util_format_get_stride(dsts->format, w) >> dst.bpps;
        h = util_format_get_nblocksy(dsts->format, h);
 
@@ -342,6 +365,80 @@ nvfx_screen_surface_init(struct pipe_screen *pscreen)
 }
 
 static void
+nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int to_temp)
+{
+       struct nvfx_surface* ns = (struct nvfx_surface*)surf;
+       struct pipe_subresource tempsr, surfsr;
+       struct pipe_resource *idxbuf_buffer;
+       unsigned idxbuf_format;
+
+       tempsr.face = 0;
+       tempsr.level = 0;
+       surfsr.face = surf->face;
+       surfsr.level = surf->level;
+
+       // TODO: do this properly, in blitter save
+       idxbuf_buffer = ((struct nvfx_context*)pipe)->idxbuf_buffer;
+       idxbuf_format = ((struct nvfx_context*)pipe)->idxbuf_format;
+
+       if(to_temp)
+               nvfx_resource_copy_region(pipe, &ns->temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height);
+       else
+               nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &ns->temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height);
+
+       ((struct nvfx_context*)pipe)->idxbuf_buffer = idxbuf_buffer;
+       ((struct nvfx_context*)pipe)->idxbuf_format = idxbuf_format;
+}
+
+void
+nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf)
+{
+       struct nvfx_surface* ns = (struct nvfx_surface*)surf;
+       struct pipe_resource template;
+       memset(&template, 0, sizeof(struct pipe_resource));
+       template.target = PIPE_TEXTURE_2D;
+       template.format = surf->format;
+       template.width0 = surf->width;
+       template.height0 = surf->height;
+       template.depth0 = 1;
+       template.nr_samples = surf->texture->nr_samples;
+       template.flags = NVFX_RESOURCE_FLAG_LINEAR;
+
+       ns->temp = (struct nvfx_miptree*)nvfx_miptree_create(pipe->screen, &template);
+       nvfx_surface_copy_temp(pipe, surf, 1);
+}
+
+void
+nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf)
+{
+       struct nvfx_context* nvfx = (struct nvfx_context*)pipe;
+       struct nvfx_surface* ns = (struct nvfx_surface*)surf;
+       boolean bound = FALSE;
+
+       /* must be done before the copy, otherwise the copy will use the temp as destination */
+       util_dirty_surface_set_clean(nvfx_surface_get_dirty_surfaces(surf), &ns->base);
+
+       nvfx_surface_copy_temp(pipe, surf, 0);
+
+       if(nvfx->framebuffer.zsbuf == surf)
+               bound = TRUE;
+       else
+       {
+               for(unsigned i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
+               {
+                       if(nvfx->framebuffer.cbufs[i] == surf)
+                       {
+                               bound = TRUE;
+                               break;
+                       }
+               }
+       }
+
+       if(!bound)
+               pipe_resource_reference((struct pipe_resource**)&ns->temp, 0);
+}
+
+static void
 nvfx_clear_render_target(struct pipe_context *pipe,
                         struct pipe_surface *dst,
                         const float *rgba,