[intel-GEM] Add tiling support to swrast.
authorKeith Packard <keithp@keithp.com>
Tue, 6 May 2008 17:51:08 +0000 (10:51 -0700)
committerKeith Packard <keithp@keithp.com>
Tue, 6 May 2008 17:51:08 +0000 (10:51 -0700)
Accessing tiled surfaces without using the fence registers requires that
software deal with the address swizzling itself.

src/mesa/drivers/dri/intel/intel_context.c
src/mesa/drivers/dri/intel/intel_fbo.c
src/mesa/drivers/dri/intel/intel_fbo.h
src/mesa/drivers/dri/intel/intel_screen.c
src/mesa/drivers/dri/intel/intel_screen.h
src/mesa/drivers/dri/intel/intel_span.c
src/mesa/drivers/dri/intel/intel_span.h

index 4a1e1a9..d258e66 100644 (file)
@@ -498,6 +498,9 @@ intel_init_bufmgr(struct intel_context *intel)
                                           intel);
    }
 
+   /* XXX bufmgr should be per-screen, not per-context */
+   intelScreen->ttm = intel->ttm;
+
    return GL_TRUE;
 }
 
index b3f6610..bc0b579 100644 (file)
@@ -295,7 +295,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
       rb->Height = height;
 
       /* This sets the Get/PutRow/Value functions */
-      intel_set_span_functions(&irb->Base);
+      /* XXX can we choose a different tile here? */
+      intel_set_span_functions(&irb->Base, INTEL_TILE_NONE);
 
       return GL_TRUE;
    }
@@ -375,7 +376,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb,
  * not a user-created renderbuffer.
  */
 struct intel_renderbuffer *
-intel_create_renderbuffer(GLenum intFormat)
+intel_create_renderbuffer(GLenum intFormat, int tiling)
 {
    GET_CURRENT_CONTEXT(ctx);
 
@@ -442,12 +443,14 @@ intel_create_renderbuffer(GLenum intFormat)
 
    irb->Base.InternalFormat = intFormat;
 
+   irb->tiling = tiling;
+
    /* intel-specific methods */
    irb->Base.Delete = intel_delete_renderbuffer;
    irb->Base.AllocStorage = intel_alloc_window_storage;
    irb->Base.GetPointer = intel_get_pointer;
    /* This sets the Get/PutRow/Value functions */
-   intel_set_span_functions(&irb->Base);
+   intel_set_span_functions(&irb->Base, tiling);
 
    return irb;
 }
@@ -519,7 +522,7 @@ intel_framebuffer_renderbuffer(GLcontext * ctx,
 
 static GLboolean
 intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, 
-                          struct gl_texture_image *texImage)
+                    struct gl_texture_image *texImage)
 {
    if (texImage->TexFormat == &_mesa_texformat_argb8888) {
       irb->Base._ActualFormat = GL_RGBA8;
@@ -558,7 +561,7 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb,
 
    irb->Base.Delete = intel_delete_renderbuffer;
    irb->Base.AllocStorage = intel_nop_alloc_storage;
-   intel_set_span_functions(&irb->Base);
+   intel_set_span_functions(&irb->Base, irb->tiling);
 
    irb->RenderToTexture = GL_TRUE;
 
@@ -586,6 +589,9 @@ intel_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage)
    _mesa_init_renderbuffer(&irb->Base, name);
    irb->Base.ClassID = INTEL_RB_CLASS;
 
+   /* XXX can we fix this? */
+   irb->tiling = INTEL_TILE_NONE;
+
    if (!intel_update_wrapper(ctx, irb, texImage)) {
       _mesa_free(irb);
       return NULL;
index c90c84b..9e085a1 100644 (file)
@@ -72,6 +72,7 @@ struct intel_renderbuffer
    struct intel_region *region;
    void *pfMap;                 /* possibly paged flipped map pointer */
    GLuint pfPitch;              /* possibly paged flipped pitch */
+   int tiling;
    GLboolean RenderToTexture;   /* RTT? */
 
    GLuint PairedDepth;   /**< only used if this is a depth renderbuffer */
@@ -90,7 +91,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *irb,
                              struct intel_region *region);
 
 extern struct intel_renderbuffer *
-intel_create_renderbuffer(GLenum intFormat);
+intel_create_renderbuffer(GLenum intFormat, int tiling);
 
 extern void intel_fbo_init(struct intel_context *intel);
 
index 356e50e..a243324 100644 (file)
@@ -524,20 +524,23 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
 
       /* setup the hardware-based renderbuffers */
       {
-         intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat);
+         intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat, 
+                                                          screen->ttm ? screen->front.tiled : INTEL_TILE_NONE);
          _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT,
                                &intel_fb->color_rb[0]->Base);
       }
 
       if (mesaVis->doubleBufferMode) {
-         intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat);
+         intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat,
+                                                          screen->ttm ? screen->back.tiled : INTEL_TILE_NONE);
          _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT,
                                &intel_fb->color_rb[1]->Base);
 
         if (screen->third.handle) {
            struct gl_renderbuffer *tmp_rb = NULL;
 
-           intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat);
+           intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat,
+                                                             screen->ttm ? screen->third.tiled : INTEL_TILE_NONE);
            _mesa_reference_renderbuffer(&tmp_rb, &intel_fb->color_rb[2]->Base);
         }
       }
@@ -546,7 +549,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
         if (mesaVis->stencilBits == 8) {
            /* combined depth/stencil buffer */
            struct intel_renderbuffer *depthStencilRb
-              = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT);
+              = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT,
+                                          screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE);
            /* note: bind RB to two attachment points */
            _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH,
                                   &depthStencilRb->Base);
@@ -554,7 +558,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
                                   &depthStencilRb->Base);
         } else {
            struct intel_renderbuffer *depthRb
-              = intel_create_renderbuffer(GL_DEPTH_COMPONENT24);
+              = intel_create_renderbuffer(GL_DEPTH_COMPONENT24,
+                                          screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE);
            _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH,
                                   &depthRb->Base);
         }
@@ -562,7 +567,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
       else if (mesaVis->depthBits == 16) {
          /* just 16-bit depth buffer, no hw stencil */
          struct intel_renderbuffer *depthRb
-            = intel_create_renderbuffer(GL_DEPTH_COMPONENT16);
+            = intel_create_renderbuffer(GL_DEPTH_COMPONENT16,
+                                       screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE);
          _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base);
       }
 
index e62b2d7..9a73b13 100644 (file)
@@ -74,6 +74,8 @@ typedef struct
    int irq_active;
    int allow_batchbuffer;
 
+   int ttm;
+
    /**
    * Configuration cache with default values for all contexts
    */
index df4f592..149b581 100644 (file)
 #include "swrast/swrast.h"
 
 /*
+ * Deal with tiled surfaces
+ */
+
+#if 0
+/* These are pre-965 tile swizzling functions -- power of two widths */
+static uintptr_t x_tile_swizzle_pow2 (uintptr_t addr, int n)
+{
+       uintptr_t       a = addr;
+       uintptr_t       base_mask = (((~0) << (n + 4)) | 0xff);
+       uintptr_t       x_mask = ((~0) << 12) & ~base_mask;
+
+       a = ((a & base_mask) | 
+            ((a >> (n-8)) & 0x7) |
+            ((a << 3) & x_mask));
+       _mesa_printf ("x_swizzle %08x (base %x yrow %x tile#x %x xsword %x byte %x) %08x\n",
+                     addr,
+                     addr >> (n + 4),
+                     (addr >> (n + 1)) & 0x7,
+                     (addr >> 9) & ((1 << (n-8)) - 1),
+                     (addr >> 5) & 0xf,
+                     (addr & 0x1f),
+                     a);
+       return a;
+}
+
+static uintptr_t y_tile_swizzle_pow2 (uintptr_t addr, int n)
+{
+       uintptr_t       a = (uintptr_t) addr;
+       uintptr_t       base_mask = (((~0) << (n + 6)) | 0xf);
+       uintptr_t       x_mask = ((~0) << 9) & ~base_mask;
+
+       a = ((a & base_mask) | 
+            ((a >> (n-3)) & 0x1f) |
+            ((a << 5) & x_mask));
+       _mesa_printf ("y_swizzle %08x (base %x yrow %x tile#x %x xoword %x byte %x) %08x\n",
+                     addr,
+                     addr >> (n + 6),
+                     (addr >> (n + 1)) & 0x01f,
+                     (addr >> 7) & ((1 << (n-6)) - 1),
+                     (addr >> 4) & 0x7,
+                     (addr & 0xf),
+                     a);
+       return a;
+}
+#endif
+
+static GLubyte *x_tile_swizzle(struct intel_renderbuffer *irb, struct intel_context *intel,
+                              int x, int y)
+{
+       GLubyte *buf = (GLubyte *) irb->pfMap;
+       int     tile_stride;
+       int     xbyte;
+       int     x_tile_off, y_tile_off;
+       int     x_tile_number, y_tile_number;
+       int     tile_off, tile_base;
+       
+       tile_stride = (irb->pfPitch * irb->region->cpp) << 3;
+       
+       x += intel->drawX;
+       y += intel->drawY;
+
+       xbyte = x * irb->region->cpp;
+
+       x_tile_off = xbyte & 0x1ff;
+       y_tile_off = y & 7;
+
+       x_tile_number = xbyte >> 9;
+       y_tile_number = y >> 3;
+
+       tile_off = (y_tile_off << 9) + x_tile_off;
+       tile_base = (x_tile_number << 12) + y_tile_number * tile_stride;
+
+       return buf + tile_base + tile_off;
+}
+
+static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_context *intel,
+                              int x, int y)
+{
+       GLubyte *buf = (GLubyte *) irb->pfMap;
+       int     tile_stride;
+       int     xbyte;
+       int     x_tile_off, y_tile_off;
+       int     x_tile_number, y_tile_number;
+       int     tile_off, tile_base;
+       
+       tile_stride = (irb->pfPitch * irb->region->cpp) << 3;
+       
+       x += intel->drawX;
+       y += intel->drawY;
+
+       xbyte = x * irb->region->cpp;
+
+       x_tile_off = xbyte & 0x7f;
+       y_tile_off = y & 0x1f;
+
+       x_tile_number = xbyte >> 7;
+       y_tile_number = y >> 5;
+
+       tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) + (x_tile_off & 0xf);
+       tile_base = (x_tile_number << 12) + y_tile_number * tile_stride;
+
+       return buf + tile_base + tile_off;
+}
+
+/*
   break intelWriteRGBASpan_ARGB8888
 */
 
       + (intel->drawY * irb->pfPitch + intel->drawX) * irb->region->cpp;\
    GLuint p;                                                           \
    assert(irb->pfMap);\
-   (void) p;
+   (void) p; (void) buf;
 
 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
  * the cliprect info from the context, not the driDrawable.
         int miny = intel->pClipRects[_nc].y1 - intel->drawY;           \
         int maxx = intel->pClipRects[_nc].x2 - intel->drawX;           \
         int maxy = intel->pClipRects[_nc].y2 - intel->drawY;
-
-
-
+       
+#if 0
+      }}
+#endif
 
 #define Y_FLIP(_y) ((_y) * yScale + yBias)
 
+/* XXX with GEM, these need to tell the kernel */
 #define HW_LOCK()
 
 #define HW_UNLOCK()
 #define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 4)
 #include "spantmp2.h"
 
+/* 16 bit RGB565 color tile spanline and pixel functions
+ */
+
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    intel_XTile_##x##_RGB565
+#define TAG2(x,y) intel_XTile_##x##_RGB565##y
+#define GET_PTR(X,Y) x_tile_swizzle(irb, intel, X, Y)
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    intel_YTile_##x##_RGB565
+#define TAG2(x,y) intel_YTile_##x##_RGB565##y
+#define GET_PTR(X,Y) y_tile_swizzle(irb, intel, X, Y)
+#include "spantmp2.h"
+
+/* 32 bit ARGB888 color tile spanline and pixel functions
+ */
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    intel_XTile_##x##_ARGB8888
+#define TAG2(x,y) intel_XTile_##x##_ARGB8888##y
+#define GET_PTR(X,Y) x_tile_swizzle(irb, intel, X, Y)
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    intel_YTile_##x##_ARGB8888
+#define TAG2(x,y) intel_YTile_##x##_ARGB8888##y
+#define GET_PTR(X,Y) y_tile_swizzle(irb, intel, X, Y)
+#include "spantmp2.h"
 
 #define LOCAL_DEPTH_VARS                                               \
    struct intel_context *intel = intel_context(ctx);                   \
    const GLint yScale = irb->RenderToTexture ? 1 : -1;                 \
    const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1;        \
    char *buf = (char *) irb->pfMap/*XXX use region->map*/ +             \
-      (intel->drawY * pitch + intel->drawX) * irb->region->cpp;
+      (intel->drawY * pitch + intel->drawX) * irb->region->cpp; (void) buf;
 
 
 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
 
 
 /**
+ ** 16-bit x tile depthbuffer functions.
+ **/
+#define WRITE_DEPTH( _x, _y, d ) \
+   (*((GLushort *)x_tile_swizzle (irb, intel, _x, _y)) = d)
+
+#define READ_DEPTH( d, _x, _y )        \
+   d = *((GLushort *)x_tile_swizzle (irb, intel, _x, _y))
+
+
+#define TAG(x) intel_XTile_##x##_z16
+#include "depthtmp.h"
+
+/**
+ ** 16-bit y tile depthbuffer functions.
+ **/
+#define WRITE_DEPTH( _x, _y, d ) \
+   (*((GLushort *)y_tile_swizzle (irb, intel, _x, _y)) = d)
+
+#define READ_DEPTH( d, _x, _y )        \
+   (d = *((GLushort *)y_tile_swizzle (irb, intel, _x, _y)))
+
+
+#define TAG(x) intel_YTile_##x##_z16
+#include "depthtmp.h"
+
+
+/**
  ** 24/8-bit interleaved depth/stencil functions
  ** Note: we're actually reading back combined depth+stencil values.
  ** The wrappers in main/depthstencil.c are used to extract the depth
 
 
 /**
+ ** 24/8-bit x-tile interleaved depth/stencil functions
+ ** Note: we're actually reading back combined depth+stencil values.
+ ** The wrappers in main/depthstencil.c are used to extract the depth
+ ** and stencil values.
+ **/
+/* Change ZZZS -> SZZZ */
+#define WRITE_DEPTH( _x, _y, d ) {                             \
+   GLuint tmp = ((d) >> 8) | ((d) << 24);                      \
+   *((GLuint *)x_tile_swizzle (irb, intel, _x, _y)) = tmp;                     \
+}
+
+/* Change SZZZ -> ZZZS */
+#define READ_DEPTH( d, _x, _y ) {                              \
+   GLuint tmp = *((GLuint *)x_tile_swizzle (irb, intel, _x, _y));              \
+   d = (tmp << 8) | (tmp >> 24);                               \
+}
+
+#define TAG(x) intel_XTile_##x##_z24_s8
+#include "depthtmp.h"
+
+/**
+ ** 24/8-bit y-tile interleaved depth/stencil functions
+ ** Note: we're actually reading back combined depth+stencil values.
+ ** The wrappers in main/depthstencil.c are used to extract the depth
+ ** and stencil values.
+ **/
+/* Change ZZZS -> SZZZ */
+#define WRITE_DEPTH( _x, _y, d ) {                             \
+   GLuint tmp = ((d) >> 8) | ((d) << 24);                      \
+   *((GLuint *)y_tile_swizzle (irb, intel, _x, _y)) = tmp;                     \
+}
+
+/* Change SZZZ -> ZZZS */
+#define READ_DEPTH( d, _x, _y ) {                              \
+   GLuint tmp = *((GLuint *)y_tile_swizzle (irb, intel, _x, _y));              \
+   d = (tmp << 8) | (tmp >> 24);                               \
+}
+
+#define TAG(x) intel_YTile_##x##_z24_s8
+#include "depthtmp.h"
+
+
+/**
  ** 8-bit stencil function (XXX FBO: This is obsolete)
  **/
 #define WRITE_STENCIL( _x, _y, d ) {                           \
 #define TAG(x) intel##x##_z24_s8
 #include "stenciltmp.h"
 
+/**
+ ** 8-bit x-tile stencil function (XXX FBO: This is obsolete)
+ **/
+#define WRITE_STENCIL( _x, _y, d ) {                           \
+   GLuint *a = (GLuint *) x_tile_swizzle (irb, intel, _x, _y);  \
+   GLuint tmp = *a;                                            \
+   tmp &= 0xffffff;                                            \
+   tmp |= ((d) << 24);                                         \
+   *a = tmp;                                                   \
+}
+
+#define READ_STENCIL( d, _x, _y )                              \
+   (d = *((GLuint*) x_tile_swizzle (irb, intel, _x, _y)) >> 24)
+
+#define TAG(x) intel_XTile_##x##_z24_s8
+#include "stenciltmp.h"
+
+/**
+ ** 8-bit y-tile stencil function (XXX FBO: This is obsolete)
+ **/
+#define WRITE_STENCIL( _x, _y, d ) {                           \
+   GLuint *a = (GLuint *) y_tile_swizzle (irb, intel, _x, _y);  \
+   GLuint tmp = *a;                                            \
+   tmp &= 0xffffff;                                            \
+   tmp |= ((d) << 24);                                         \
+   *a = tmp;                                                   \
+}
+
+#define READ_STENCIL( d, _x, _y )                              \
+   (d = *((GLuint*) y_tile_swizzle (irb, intel, _x, _y)) >> 24)
+
+#define TAG(x) intel_YTile_##x##_z24_s8
+#include "stenciltmp.h"
+
 
 
 /**
@@ -379,25 +627,80 @@ intelInitSpanFuncs(GLcontext * ctx)
  * These are used for the software fallbacks.
  */
 void
-intel_set_span_functions(struct gl_renderbuffer *rb)
+intel_set_span_functions(struct gl_renderbuffer *rb, int tiling)
 {
    if (rb->_ActualFormat == GL_RGB5) {
       /* 565 RGB */
-      intelInitPointers_RGB565(rb);
+      switch (tiling) {
+      case INTEL_TILE_NONE:
+      default:
+        intelInitPointers_RGB565(rb);
+        break;
+      case INTEL_TILE_X:
+        intel_XTile_InitPointers_RGB565(rb);
+        break;
+      case INTEL_TILE_Y:
+        intel_YTile_InitPointers_RGB565(rb);
+        break;
+      }
    }
    else if (rb->_ActualFormat == GL_RGBA8) {
       /* 8888 RGBA */
-      intelInitPointers_ARGB8888(rb);
+      switch (tiling) {
+      case INTEL_TILE_NONE:
+      default:
+        intelInitPointers_ARGB8888(rb);
+        break;
+      case INTEL_TILE_X:
+        intel_XTile_InitPointers_ARGB8888(rb);
+        break;
+      case INTEL_TILE_Y:
+        intel_YTile_InitPointers_ARGB8888(rb);
+        break;
+      }
    }
    else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) {
-      intelInitDepthPointers_z16(rb);
+      switch (tiling) {
+      case INTEL_TILE_NONE:
+      default:
+        intelInitDepthPointers_z16(rb);
+        break;
+      case INTEL_TILE_X:
+        intel_XTile_InitDepthPointers_z16(rb);
+        break;
+      case INTEL_TILE_Y:
+        intel_YTile_InitDepthPointers_z16(rb);
+        break;
+      }
    }
    else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 ||        /* XXX FBO remove */
             rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
-      intelInitDepthPointers_z24_s8(rb);
+      switch (tiling) {
+      case INTEL_TILE_NONE:
+      default:
+        intelInitDepthPointers_z24_s8(rb);
+        break;
+      case INTEL_TILE_X:
+        intel_XTile_InitDepthPointers_z24_s8(rb);
+        break;
+      case INTEL_TILE_Y:
+        intel_YTile_InitDepthPointers_z24_s8(rb);
+        break;
+      }
    }
-   else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) {       /* XXX FBO remove */
-      intelInitStencilPointers_z24_s8(rb);
+   else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) {
+      switch (tiling) {
+      case INTEL_TILE_NONE:
+      default:
+        intelInitStencilPointers_z24_s8(rb);
+        break;
+      case INTEL_TILE_X:
+        intel_XTile_InitStencilPointers_z24_s8(rb);
+        break;
+      case INTEL_TILE_Y:
+        intel_YTile_InitStencilPointers_z24_s8(rb);
+        break;
+      }
    }
    else {
       _mesa_problem(NULL,
index 5201f6d..c56e5e1 100644 (file)
@@ -33,6 +33,10 @@ extern void intelInitSpanFuncs(GLcontext * ctx);
 extern void intelSpanRenderFinish(GLcontext * ctx);
 extern void intelSpanRenderStart(GLcontext * ctx);
 
-extern void intel_set_span_functions(struct gl_renderbuffer *rb);
+extern void intel_set_span_functions(struct gl_renderbuffer *rb, int tiling);
+
+#define INTEL_TILE_NONE        0
+#define INTEL_TILE_X   1
+#define INTEL_TILE_Y   2
 
 #endif