From 87a326a38589e1c919af5f86a59cd571ff0aa831 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sun, 24 Oct 2010 16:36:12 +0200 Subject: [PATCH] drm/nv20: Add Z compression support. Signed-off-by: Francisco Jerez Tested-by: Xavier Chantry Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_drv.h | 3 ++ drivers/gpu/drm/nouveau/nouveau_reg.h | 6 +++ drivers/gpu/drm/nouveau/nv10_fb.c | 90 +++++++++++++++++++++++++++++++++-- drivers/gpu/drm/nouveau/nv20_graph.c | 21 ++++---- 4 files changed, 106 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 8b524d8..a94430b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -70,6 +70,8 @@ struct nouveau_tile_reg { uint32_t addr; uint32_t limit; uint32_t pitch; + uint32_t zcomp; + struct drm_mm_node *tag_mem; struct nouveau_fence *fence; }; @@ -306,6 +308,7 @@ struct nouveau_timer_engine { struct nouveau_fb_engine { int num_tiles; + struct drm_mm tag_heap; int (*init)(struct drm_device *dev); void (*takedown)(struct drm_device *dev); diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h index b6384d3..df3a87e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_reg.h +++ b/drivers/gpu/drm/nouveau/nouveau_reg.h @@ -45,6 +45,11 @@ # define NV04_PFB_REF_CMD_REFRESH (1 << 0) #define NV04_PFB_PRE 0x001002d4 # define NV04_PFB_PRE_CMD_PRECHARGE (1 << 0) +#define NV20_PFB_ZCOMP(i) (0x00100300 + 4*(i)) +# define NV20_PFB_ZCOMP_MODE_32 (4 << 24) +# define NV20_PFB_ZCOMP_EN (1 << 31) +# define NV25_PFB_ZCOMP_MODE_16 (1 << 20) +# define NV25_PFB_ZCOMP_MODE_32 (2 << 20) #define NV10_PFB_CLOSE_PAGE2 0x0010033c #define NV04_PFB_SCRAMBLE(i) (0x00100400 + 4 * (i)) #define NV40_PFB_TILE(i) (0x00100600 + (i*16)) @@ -379,6 +384,7 @@ #define NV20_PGRAPH_TLIMIT(i) (0x00400904 + (i*16)) #define NV20_PGRAPH_TSIZE(i) (0x00400908 + (i*16)) #define NV20_PGRAPH_TSTATUS(i) (0x0040090C + (i*16)) +#define NV20_PGRAPH_ZCOMP(i) (0x00400980 + 4*(i)) #define NV10_PGRAPH_TILE(i) (0x00400B00 + (i*16)) #define NV10_PGRAPH_TLIMIT(i) (0x00400B04 + (i*16)) #define NV10_PGRAPH_TSIZE(i) (0x00400B08 + (i*16)) diff --git a/drivers/gpu/drm/nouveau/nv10_fb.c b/drivers/gpu/drm/nouveau/nv10_fb.c index d50acc6..f78181a 100644 --- a/drivers/gpu/drm/nouveau/nv10_fb.c +++ b/drivers/gpu/drm/nouveau/nv10_fb.c @@ -3,21 +3,81 @@ #include "nouveau_drv.h" #include "nouveau_drm.h" +static struct drm_mm_node * +nv20_fb_alloc_tag(struct drm_device *dev, uint32_t size) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; + struct drm_mm_node *mem; + int ret; + + ret = drm_mm_pre_get(&pfb->tag_heap); + if (ret) + return NULL; + + spin_lock(&dev_priv->tile.lock); + mem = drm_mm_search_free(&pfb->tag_heap, size, 0, 0); + if (mem) + mem = drm_mm_get_block_atomic(mem, size, 0); + spin_unlock(&dev_priv->tile.lock); + + return mem; +} + +static void +nv20_fb_free_tag(struct drm_device *dev, struct drm_mm_node *mem) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + + spin_lock(&dev_priv->tile.lock); + drm_mm_put_block(mem); + spin_unlock(&dev_priv->tile.lock); +} + void nv10_fb_init_tile_region(struct drm_device *dev, int i, uint32_t addr, uint32_t size, uint32_t pitch, uint32_t flags) { struct drm_nouveau_private *dev_priv = dev->dev_private; struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i]; + int bpp = (flags & NOUVEAU_GEM_TILE_32BPP ? 32 : 16); tile->addr = addr; tile->limit = max(1u, addr + size) - 1; tile->pitch = pitch; - if (dev_priv->card_type == NV_20) - tile->addr |= 1; - else + if (dev_priv->card_type == NV_20) { + if (flags & NOUVEAU_GEM_TILE_ZETA) { + /* + * Allocate some of the on-die tag memory, + * used to store Z compression meta-data (most + * likely just a bitmap determining if a given + * tile is compressed or not). + */ + tile->tag_mem = nv20_fb_alloc_tag(dev, size / 256); + + if (tile->tag_mem) { + /* Enable Z compression */ + if (dev_priv->chipset >= 0x25) + tile->zcomp = tile->tag_mem->start | + (bpp == 16 ? + NV25_PFB_ZCOMP_MODE_16 : + NV25_PFB_ZCOMP_MODE_32); + else + tile->zcomp = tile->tag_mem->start | + NV20_PFB_ZCOMP_EN | + (bpp == 16 ? 0 : + NV20_PFB_ZCOMP_MODE_32); + } + + tile->addr |= 3; + } else { + tile->addr |= 1; + } + + } else { tile->addr |= 1 << 31; + } } void @@ -26,7 +86,12 @@ nv10_fb_free_tile_region(struct drm_device *dev, int i) struct drm_nouveau_private *dev_priv = dev->dev_private; struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i]; - tile->addr = tile->limit = tile->pitch = 0; + if (tile->tag_mem) { + nv20_fb_free_tag(dev, tile->tag_mem); + tile->tag_mem = NULL; + } + + tile->addr = tile->limit = tile->pitch = tile->zcomp = 0; } void @@ -38,6 +103,9 @@ nv10_fb_set_tile_region(struct drm_device *dev, int i) nv_wr32(dev, NV10_PFB_TLIMIT(i), tile->limit); nv_wr32(dev, NV10_PFB_TSIZE(i), tile->pitch); nv_wr32(dev, NV10_PFB_TILE(i), tile->addr); + + if (dev_priv->card_type == NV_20) + nv_wr32(dev, NV20_PFB_ZCOMP(i), tile->zcomp); } int @@ -49,6 +117,11 @@ nv10_fb_init(struct drm_device *dev) pfb->num_tiles = NV10_PFB_TILE__SIZE; + if (dev_priv->card_type == NV_20) + drm_mm_init(&pfb->tag_heap, 0, + (dev_priv->chipset >= 0x25 ? + 64 * 1024 : 32 * 1024)); + /* Turn all the tiling regions off. */ for (i = 0; i < pfb->num_tiles; i++) pfb->set_tile_region(dev, i); @@ -59,4 +132,13 @@ nv10_fb_init(struct drm_device *dev) void nv10_fb_takedown(struct drm_device *dev) { + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; + int i; + + for (i = 0; i < pfb->num_tiles; i++) + pfb->free_tile_region(dev, i); + + if (dev_priv->card_type == NV_20) + drm_mm_takedown(&pfb->tag_heap); } diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c index a71871b..bd065c2 100644 --- a/drivers/gpu/drm/nouveau/nv20_graph.c +++ b/drivers/gpu/drm/nouveau/nv20_graph.c @@ -526,6 +526,12 @@ nv20_graph_set_tile_region(struct drm_device *dev, int i) nv_wr32(dev, NV10_PGRAPH_RDI_DATA, tile->pitch); nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i); nv_wr32(dev, NV10_PGRAPH_RDI_DATA, tile->addr); + + if (dev_priv->card_type == NV_20) { + nv_wr32(dev, NV20_PGRAPH_ZCOMP(i), tile->zcomp); + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00ea0090 + 4 * i); + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, tile->zcomp); + } } int @@ -589,16 +595,17 @@ nv20_graph_init(struct drm_device *dev) nv_wr32(dev, 0x40009C , 0x00000040); if (dev_priv->chipset >= 0x25) { - nv_wr32(dev, 0x400890, 0x00080000); + nv_wr32(dev, 0x400890, 0x00a8cfff); nv_wr32(dev, 0x400610, 0x304B1FB6); - nv_wr32(dev, 0x400B80, 0x18B82880); + nv_wr32(dev, 0x400B80, 0x1cbd3883); nv_wr32(dev, 0x400B84, 0x44000000); nv_wr32(dev, 0x400098, 0x40000080); nv_wr32(dev, 0x400B88, 0x000000ff); + } else { - nv_wr32(dev, 0x400880, 0x00080000); /* 0x0008c7df */ + nv_wr32(dev, 0x400880, 0x0008c7df); nv_wr32(dev, 0x400094, 0x00000005); - nv_wr32(dev, 0x400B80, 0x45CAA208); /* 0x45eae20e */ + nv_wr32(dev, 0x400B80, 0x45eae20e); nv_wr32(dev, 0x400B84, 0x24000000); nv_wr32(dev, 0x400098, 0x00000040); nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00E00038); @@ -611,12 +618,6 @@ nv20_graph_init(struct drm_device *dev) for (i = 0; i < NV10_PFB_TILE__SIZE; i++) nv20_graph_set_tile_region(dev, i); - for (i = 0; i < 8; i++) { - nv_wr32(dev, 0x400980 + i * 4, nv_rd32(dev, 0x100300 + i * 4)); - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + i * 4); - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, - nv_rd32(dev, 0x100300 + i * 4)); - } nv_wr32(dev, 0x4009a0, nv_rd32(dev, 0x100324)); nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA000C); nv_wr32(dev, NV10_PGRAPH_RDI_DATA, nv_rd32(dev, 0x100324)); -- 2.7.4