From eda2284961e46002c7b2bd4e4ae2785d7b0a2191 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 22 Jan 2009 02:53:30 +1000
Subject: [PATCH] r200r300: start merging span code

---
 src/mesa/drivers/dri/r200/r200_span.c     | 141 +++--------------
 src/mesa/drivers/dri/r200/r200_texstate.c | 253 ------------------------------
 src/mesa/drivers/dri/r300/radeon_span.c   | 217 -------------------------
 src/mesa/drivers/dri/radeon/common_misc.c | 211 +++++++++++++++++++++++++
 src/mesa/drivers/dri/radeon/common_misc.h |   9 +-
 5 files changed, 241 insertions(+), 590 deletions(-)

diff --git a/src/mesa/drivers/dri/r200/r200_span.c b/src/mesa/drivers/dri/r200/r200_span.c
index df0172f..17c6513 100644
--- a/src/mesa/drivers/dri/r200/r200_span.c
+++ b/src/mesa/drivers/dri/r200/r200_span.c
@@ -52,21 +52,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  * information.
  */
 #define LOCAL_VARS						\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
-   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
+   struct radeon_renderbuffer *rrb = (void *) rb;		\
+   const __DRIdrawablePrivate *dPriv = rrb->dPriv;		\
    const GLuint bottom = dPriv->h - 1;				\
-   GLubyte *buf = (GLubyte *) drb->flippedData			\
-      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
    GLuint p;							\
    (void) p;
 
 #define LOCAL_DEPTH_VARS				\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
-   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
+   struct radeon_renderbuffer *rrb = (void *) rb;		\
+   const __DRIdrawablePrivate *dPriv = rrb->dPriv;	\
    const GLuint bottom = dPriv->h - 1;			\
    GLuint xo = dPriv->x;				\
-   GLuint yo = dPriv->y;				\
-   GLubyte *buf = (GLubyte *) drb->Base.Data;
+   GLuint yo = dPriv->y;
 
 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
 
@@ -89,7 +86,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define TAG(x)    radeon##x##_RGB565
 #define TAG2(x,y) radeon##x##_RGB565##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
+#define GET_PTR(X,Y) radeon_ptr16(rrb, (X), (Y))
 #include "spantmp2.h"
 
 /* 32 bit, ARGB8888 color spanline and pixel functions
@@ -99,7 +96,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define TAG(x)    radeon##x##_ARGB8888
 #define TAG2(x,y) radeon##x##_ARGB8888##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
+#define GET_PTR(X,Y) radeon_ptr32(rrb, (X), (Y))
 #include "spantmp2.h"
 
 
@@ -116,70 +113,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  * are set up correctly. It is not quite enough to get it working with hyperz too...
  */
 
-/* extract bit 'b' of x, result is zero or one */
-#define BIT(x,b) ((x & (1<<b))>>b)
-
-static GLuint
-r200_mba_z32( driRenderbuffer *drb, GLint x, GLint y )
-{
-   GLuint pitch = drb->pitch;
-   if (drb->depthHasSurface) {
-      return 4 * (x + y * pitch);
-   }
-   else {
-      GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 5) + ((x & 0x7FF) >> 5);
-      GLuint a = 
-         (BIT(x,0) << 2) |
-         (BIT(y,0) << 3) |
-         (BIT(x,1) << 4) |
-         (BIT(y,1) << 5) |
-         (BIT(x,3) << 6) |
-         (BIT(x,4) << 7) |
-         (BIT(x,2) << 8) |
-         (BIT(y,2) << 9) |
-         (BIT(y,3) << 10) |
-         (((pitch & 0x20) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
-         ((b >> 1) << 12);
-      return a;
-   }
-}
-
-static GLuint
-r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y )
-{
-   GLuint pitch = drb->pitch;
-   if (drb->depthHasSurface) {
-      return 2 * (x + y * pitch);
-   }
-   else {
-      GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 6) + ((x & 0x7FF) >> 6);
-      GLuint a = 
-         (BIT(x,0) << 1) |
-         (BIT(y,0) << 2) |
-         (BIT(x,1) << 3) |
-         (BIT(y,1) << 4) |
-         (BIT(x,2) << 5) |
-         (BIT(x,4) << 6) |
-         (BIT(x,5) << 7) |
-         (BIT(x,3) << 8) |
-         (BIT(y,2) << 9) |
-         (BIT(y,3) << 10) |
-         (((pitch & 0x40) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
-         ((b >> 1) << 12);
-      return a;
-   }
-}
-
-
 /* 16-bit depth buffer functions
  */
 #define VALUE_TYPE GLushort
 
 #define WRITE_DEPTH( _x, _y, d )					\
-   *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )) = d;
+   *(GLushort *)radeon_ptr(rrb, _x + xo, _y + yo) = d
 
 #define READ_DEPTH( d, _x, _y )						\
-   d = *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo ));
+   d = *(GLushort *)radeon_ptr(rrb, _x + xo, _y + yo)
 
 #define TAG(x) radeon##x##_z16
 #include "depthtmp.h"
@@ -191,16 +133,17 @@ r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y )
 
 #define WRITE_DEPTH( _x, _y, d )					\
 do {									\
-   GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo );		\
-   GLuint tmp = *(GLuint *)(buf + offset);				\
+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo);		\
+   GLuint tmp = *_ptr;							\
    tmp &= 0xff000000;							\
    tmp |= ((d) & 0x00ffffff);						\
-   *(GLuint *)(buf + offset) = tmp;					\
+   *_ptr = tmp;					\
 } while (0)
 
 #define READ_DEPTH( d, _x, _y )						\
-   d = *(GLuint *)(buf + r200_mba_z32( drb, _x + xo,			\
-					 _y + yo )) & 0x00ffffff;
+   do {									\
+      d = (*(GLuint*)(radeon_ptr32(rrb, _x + xo, _y + yo)) & 0x00ffffff); \
+   }while(0)
 
 #define TAG(x) radeon##x##_z24_s8
 #include "depthtmp.h"
@@ -214,17 +157,17 @@ do {									\
  */
 #define WRITE_STENCIL( _x, _y, d )					\
 do {									\
-   GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo );		\
-   GLuint tmp = *(GLuint *)(buf + offset);				\
+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo);		\
+   GLuint tmp = *_ptr;				\
    tmp &= 0x00ffffff;							\
    tmp |= (((d) & 0xff) << 24);						\
-   *(GLuint *)(buf + offset) = tmp;					\
+   *_ptr = tmp;					\
 } while (0)
 
 #define READ_STENCIL( d, _x, _y )					\
 do {									\
-   GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo );		\
-   GLuint tmp = *(GLuint *)(buf + offset);				\
+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo);		\
+   GLuint tmp = *_ptr;							\
    tmp &= 0xff000000;							\
    d = tmp >> 24;							\
 } while (0)
@@ -233,51 +176,11 @@ do {									\
 #include "stenciltmp.h"
 
 
-/* Move locking out to get reasonable span performance (10x better
- * than doing this in HW_LOCK above).  WaitForIdle() is the main
- * culprit.
- */
-
-static void r200SpanRenderStart( GLcontext *ctx )
-{
-   r200ContextPtr rmesa = R200_CONTEXT( ctx );
-
-   R200_FIREVERTICES( rmesa );
-   LOCK_HARDWARE( &rmesa->radeon );
-   radeonWaitForIdleLocked( &rmesa->radeon );
-
-   /* Read & rewrite the first pixel in the frame buffer.  This should
-    * be a noop, right?  In fact without this conform fails as reading
-    * from the framebuffer sometimes produces old results -- the
-    * on-card read cache gets mixed up and doesn't notice that the
-    * framebuffer has been updated.
-    *
-    * In the worst case this is buggy too as p might get the wrong
-    * value first time, so really need a hidden pixel somewhere for this.
-    */
-   {
-      int p;
-      driRenderbuffer *drb =
-	 (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
-      volatile int *buf =
-	 (volatile int *)(rmesa->radeon.dri.screen->pFB + drb->offset);
-      p = *buf;
-      *buf = p;
-   }
-}
-
-static void r200SpanRenderFinish( GLcontext *ctx )
-{
-   r200ContextPtr rmesa = R200_CONTEXT( ctx );
-   _swrast_flush( ctx );
-   UNLOCK_HARDWARE( &rmesa->radeon );
-}
-
 void r200InitSpanFuncs( GLcontext *ctx )
 {
    struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
-   swdd->SpanRenderStart          = r200SpanRenderStart;
-   swdd->SpanRenderFinish         = r200SpanRenderFinish; 
+   swdd->SpanRenderStart          = radeonSpanRenderStart;
+   swdd->SpanRenderFinish         = radeonSpanRenderFinish; 
 }
 
 
diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
index 7dcb86d..12b8288 100644
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -141,259 +141,6 @@ static const struct tx_table tx_table_le[] =
 #undef _ALPHA
 #undef _INVALID
 
-#if 0
-
-/**
- * This function computes the number of bytes of storage needed for
- * the given texture object (all mipmap levels, all cube faces).
- * The \c image[face][level].x/y/width/height parameters for upload/blitting
- * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
- * too.
- * 
- * \param rmesa Context pointer
- * \param tObj GL texture object whose images are to be posted to
- *                 hardware state.
- */
-static void r200SetTexImages( r200ContextPtr rmesa,
-			      struct gl_texture_object *tObj )
-{
-   radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
-   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
-   GLint curOffset, blitWidth;
-   GLint i, texelBytes;
-   GLint numLevels;
-   GLint log2Width, log2Height, log2Depth;
-
-   /* Set the hardware texture format
-    */
-   if ( !t->image_override ) {
-      if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
-	 const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
-								tx_table_be;
-
-         t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
-                             R200_TXFORMAT_ALPHA_IN_MAP);
-         t->pp_txfilter &= ~R200_YUV_TO_RGB;
-
-	 t->pp_txformat |= table[ baseImage->TexFormat->MesaFormat ].format;
-	 t->pp_txfilter |= table[ baseImage->TexFormat->MesaFormat ].filter;
-      }
-      else {
-         _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
-         return;
-      }
-   }
-
-
-
-   /* Compute which mipmap levels we really want to send to the hardware.
-    */
-
-   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
-   log2Width  = tObj->Image[0][t->base.firstLevel]->WidthLog2;
-   log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
-   log2Depth  = tObj->Image[0][t->base.firstLevel]->DepthLog2;
-
-   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-
-   assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
-
-   /* Calculate mipmap offsets and dimensions for blitting (uploading)
-    * The idea is that we lay out the mipmap levels within a block of
-    * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
-    */
-   curOffset = 0;
-   blitWidth = BLIT_WIDTH_BYTES;
-   t->tile_bits = 0;
-
-   /* figure out if this texture is suitable for tiling. */
-   if (texelBytes) {
-      if (rmesa->texmicrotile  && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
-      /* texrect might be able to use micro tiling too in theory? */
-	 (baseImage->Height > 1)) {
-	 /* allow 32 (bytes) x 1 mip (which will use two times the space
-	 the non-tiled version would use) max if base texture is large enough */
-	 if ((numLevels == 1) ||
-	   (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
-	       (baseImage->Width * texelBytes > 64)) ||
-	    ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
-	    t->tile_bits |= R200_TXO_MICRO_TILE;
-	 }
-      }
-      if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
-	/* we can set macro tiling even for small textures, they will be untiled anyway */
-	 t->tile_bits |= R200_TXO_MACRO_TILE;
-      }
-   }
-
-   for (i = 0; i < numLevels; i++) {
-      const struct gl_texture_image *texImage;
-      GLuint size;
-
-      texImage = tObj->Image[0][i + t->base.firstLevel];
-      if ( !texImage )
-	 break;
-
-      /* find image size in bytes */
-      if (texImage->IsCompressed) {
-      /* need to calculate the size AFTER padding even though the texture is
-         submitted without padding.
-         Only handle pot textures currently - don't know if npot is even possible,
-         size calculation would certainly need (trivial) adjustments.
-         Align (and later pad) to 32byte, not sure what that 64byte blit width is
-         good for? */
-         if ((t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) == R200_TXFORMAT_DXT1) {
-            /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
-            if ((texImage->Width + 3) < 8) /* width one block */
-               size = texImage->CompressedSize * 4;
-            else if ((texImage->Width + 3) < 16)
-               size = texImage->CompressedSize * 2;
-            else size = texImage->CompressedSize;
-         }
-         else /* DXT3/5, 16 bytes per block */
-            if ((texImage->Width + 3) < 8)
-               size = texImage->CompressedSize * 2;
-            else size = texImage->CompressedSize;
-      }
-      else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-	 size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
-      }
-      else if (t->tile_bits & R200_TXO_MICRO_TILE) {
-	 /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
-	    though the actual offset may be different (if texture is less than
-	    32 bytes width) to the untiled case */
-	 int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
-	 size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
-	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
-      }
-      else {
-	 int w = (texImage->Width * texelBytes + 31) & ~31;
-	 size = w * texImage->Height * texImage->Depth;
-	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
-      }
-      assert(size > 0);
-
-      /* Align to 32-byte offset.  It is faster to do this unconditionally
-       * (no branch penalty).
-       */
-
-      curOffset = (curOffset + 0x1f) & ~0x1f;
-
-      if (texelBytes) {
-	 t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
-	 t->image[0][i].y = 0;
-	 t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
-	 t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
-      }
-      else {
-         t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
-         t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
-         t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
-         t->image[0][i].height = size / t->image[0][i].width;     
-      }
-
-#if 0
-      /* for debugging only and only  applicable to non-rectangle targets */
-      assert(size % t->image[0][i].width == 0);
-      assert(t->image[0][i].x == 0
-             || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
-#endif
-
-      if (0)
-         fprintf(stderr,
-                 "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
-                 i, texImage->Width, texImage->Height,
-                 t->image[0][i].x, t->image[0][i].y,
-                 t->image[0][i].width, t->image[0][i].height, size, curOffset);
-
-      curOffset += size;
-
-   }
-
-   /* Align the total size of texture memory block.
-    */
-   t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
-
-   /* Setup remaining cube face blits, if needed */
-   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
-      const GLuint faceSize = t->base.totalSize;
-      GLuint face;
-      /* reuse face 0 x/y/width/height - just update the offset when uploading */
-      for (face = 1; face < 6; face++) {
-         for (i = 0; i < numLevels; i++) {
-            t->image[face][i].x =  t->image[0][i].x;
-            t->image[face][i].y =  t->image[0][i].y;
-            t->image[face][i].width  = t->image[0][i].width;
-            t->image[face][i].height = t->image[0][i].height;
-         }
-      }
-      t->base.totalSize = 6 * faceSize; /* total texmem needed */
-   }
-
-
-   /* Hardware state:
-    */
-   t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
-   t->pp_txfilter |= (numLevels - 1) << R200_MAX_MIP_LEVEL_SHIFT;
-
-   t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
-		       R200_TXFORMAT_HEIGHT_MASK |
-                       R200_TXFORMAT_CUBIC_MAP_ENABLE |
-                       R200_TXFORMAT_F5_WIDTH_MASK |
-                       R200_TXFORMAT_F5_HEIGHT_MASK);
-   t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
-		      (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
-
-   t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
-   if (tObj->Target == GL_TEXTURE_3D) {
-      t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
-      t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
-   }
-   else if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
-      ASSERT(log2Width == log2Height);
-      t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
-                         (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
-/* don't think we need this bit, if it exists at all - fglrx does not set it */
-                         (R200_TXFORMAT_CUBIC_MAP_ENABLE));
-      t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
-      t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
-                           (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
-                           (log2Width << R200_FACE_WIDTH_2_SHIFT) |
-                           (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
-                           (log2Width << R200_FACE_WIDTH_3_SHIFT) |
-                           (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
-                           (log2Width << R200_FACE_WIDTH_4_SHIFT) |
-                           (log2Height << R200_FACE_HEIGHT_4_SHIFT));
-   }
-   else {
-      /* If we don't in fact send enough texture coordinates, q will be 1,
-       * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
-       */
-      t->pp_txformat_x |= R200_TEXCOORD_PROJ;
-   }
-
-   t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
-                   ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
-
-   /* Only need to round to nearest 32 for textures, but the blitter
-    * requires 64-byte aligned pitches, and we may/may not need the
-    * blitter.   NPOT only!
-    */
-   if ( !t->image_override ) {
-      if (baseImage->IsCompressed)
-         t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
-      else
-         t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
-      t->pp_txpitch -= 32;
-   }
-
-   t->dirty_state = R200_TEX_ALL;
-
-   /* FYI: r200UploadTexImages( rmesa, t ) used to be called here */
-}
-#endif
-
-
 /* ================================================================
  * Texture combine functions
  */
diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
index 30dde80..5019ee4 100644
--- a/src/mesa/drivers/dri/r300/radeon_span.c
+++ b/src/mesa/drivers/dri/r300/radeon_span.c
@@ -79,133 +79,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define HW_UNLOCK()
 
-static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
-                             GLint x, GLint y)
-{
-    GLubyte *ptr = rrb->bo->ptr;
-    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
-    GLint offset;
-    GLint nmacroblkpl;
-    GLint nmicroblkpl;
-
-    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
-        offset = x * rrb->cpp + y * rrb->pitch;
-    } else {
-        offset = 0;
-        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
-                nmacroblkpl = rrb->pitch >> 5;
-                offset += ((y >> 4) * nmacroblkpl) << 11;
-                offset += ((y & 15) >> 1) << 8;
-                offset += (y & 1) << 4;
-                offset += (x >> 5) << 11;
-                offset += ((x & 31) >> 2) << 5;
-                offset += (x & 3) << 2;
-            } else {
-                nmacroblkpl = rrb->pitch >> 6;
-                offset += ((y >> 3) * nmacroblkpl) << 11;
-                offset += (y & 7) << 8;
-                offset += (x >> 6) << 11;
-                offset += ((x & 63) >> 3) << 5;
-                offset += (x & 7) << 2;
-            }
-        } else {
-            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
-            offset += (y * nmicroblkpl) << 5;
-            offset += (x >> 3) << 5;
-            offset += (x & 7) << 2;
-        }
-    }
-    return &ptr[offset];
-}
-
-static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
-                             GLint x, GLint y)
-{
-    GLubyte *ptr = rrb->bo->ptr;
-    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
-    GLint offset;
-    GLint nmacroblkpl;
-    GLint nmicroblkpl;
-
-    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
-        offset = x * rrb->cpp + y * rrb->pitch;
-    } else {
-        offset = 0;
-        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
-                nmacroblkpl = rrb->pitch >> 6;
-                offset += ((y >> 4) * nmacroblkpl) << 11;
-                offset += ((y & 15) >> 1) << 8;
-                offset += (y & 1) << 4;
-                offset += (x >> 6) << 11;
-                offset += ((x & 63) >> 3) << 5;
-                offset += (x & 7) << 1;
-            } else {
-                nmacroblkpl = rrb->pitch >> 7;
-                offset += ((y >> 3) * nmacroblkpl) << 11;
-                offset += (y & 7) << 8;
-                offset += (x >> 7) << 11;
-                offset += ((x & 127) >> 4) << 5;
-                offset += (x & 15) << 2;
-            }
-        } else {
-            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
-            offset += (y * nmicroblkpl) << 5;
-            offset += (x >> 4) << 5;
-            offset += (x & 15) << 2;
-        }
-    }
-    return &ptr[offset];
-}
-
-static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
-                           GLint x, GLint y)
-{
-    GLubyte *ptr = rrb->bo->ptr;
-    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
-    GLint offset;
-    GLint microblkxs;
-    GLint macroblkxs;
-    GLint nmacroblkpl;
-    GLint nmicroblkpl;
-
-    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
-        offset = x * rrb->cpp + y * rrb->pitch;
-    } else {
-        offset = 0;
-        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
-                microblkxs = 16 / rrb->cpp;
-                macroblkxs = 128 / rrb->cpp;
-                nmacroblkpl = rrb->pitch / macroblkxs;
-                offset += ((y >> 4) * nmacroblkpl) << 11;
-                offset += ((y & 15) >> 1) << 8;
-                offset += (y & 1) << 4;
-                offset += (x / macroblkxs) << 11;
-                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
-                offset += (x & (microblkxs - 1)) * rrb->cpp;
-            } else {
-                microblkxs = 32 / rrb->cpp;
-                macroblkxs = 256 / rrb->cpp;
-                nmacroblkpl = rrb->pitch / macroblkxs;
-                offset += ((y >> 3) * nmacroblkpl) << 11;
-                offset += (y & 7) << 8;
-                offset += (x / macroblkxs) << 11;
-                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
-                offset += (x & (microblkxs - 1)) * rrb->cpp;
-            }
-        } else {
-            microblkxs = 32 / rrb->cpp;
-            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
-            offset += (y * nmicroblkpl) << 5;
-            offset += (x / microblkxs) << 5;
-            offset += (x & (microblkxs - 1)) * rrb->cpp;
-        }
-    }
-    return &ptr[offset];
-}
-
 /* ================================================================
  * Color buffer
  */
@@ -345,96 +218,6 @@ do {									\
 #define TAG(x) radeon##x##_z24_s8
 #include "stenciltmp.h"
 
-static void map_buffer(struct gl_renderbuffer *rb, GLboolean write)
-{
-	struct radeon_renderbuffer *rrb = (void*)rb;
-    int r;
-
-	if (rrb->bo) {
-        r = radeon_bo_map(rrb->bo, write);
-        if (r) {
-            fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
-                    __FUNCTION__, r);
-        }
-    }
-}
-
-static void unmap_buffer(struct gl_renderbuffer *rb)
-{
-	struct radeon_renderbuffer *rrb = (void*)rb;
-
-	if (rrb->bo) {
-        radeon_bo_unmap(rrb->bo);
-    }
-}
-
-/* Move locking out to get reasonable span performance (10x better
- * than doing this in HW_LOCK above).  WaitForIdle() is the main
- * culprit.
- */
-
-static void radeonSpanRenderStart(GLcontext * ctx)
-{
-	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-	int i;
-#ifdef COMPILE_R300
-	r300ContextPtr r300 = (r300ContextPtr) rmesa;
-	R300_FIREVERTICES(r300);
-#else
-	RADEON_FIREVERTICES(rmesa);
-#endif
-
-	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
-		if (ctx->Texture.Unit[i]._ReallyEnabled)
-			ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
-	}
-
-	/* color draw buffers */
-	for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
-		map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE);
-    }
-
-	map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE);
-
-	if (ctx->DrawBuffer->_DepthBuffer) {
-		map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE);
-    }
-	if (ctx->DrawBuffer->_StencilBuffer)
-		map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE);
-
-	/* The locking and wait for idle should really only be needed in classic mode.
-	 * In a future memory manager based implementation, this should become
-	 * unnecessary due to the fact that mapping our buffers, textures, etc.
-	 * should implicitly wait for any previous rendering commands that must
-	 * be waited on. */
-	LOCK_HARDWARE(rmesa);
-	radeonWaitForIdleLocked(rmesa);
-}
-
-static void radeonSpanRenderFinish(GLcontext * ctx)
-{
-	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-	int i;
-	_swrast_flush(ctx);
-	UNLOCK_HARDWARE(rmesa);
-
-	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
-		if (ctx->Texture.Unit[i]._ReallyEnabled)
-			ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
-	}
-
-	/* color draw buffers */
-	for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
-		unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
-
-	unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer);
-
-	if (ctx->DrawBuffer->_DepthBuffer)
-		unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
-	if (ctx->DrawBuffer->_StencilBuffer)
-		unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
-}
-
 void radeonInitSpanFuncs(GLcontext * ctx)
 {
 	struct swrast_device_driver *swdd =
diff --git a/src/mesa/drivers/dri/radeon/common_misc.c b/src/mesa/drivers/dri/radeon/common_misc.c
index 94d2e6c..99ca936 100644
--- a/src/mesa/drivers/dri/radeon/common_misc.c
+++ b/src/mesa/drivers/dri/radeon/common_misc.c
@@ -2110,3 +2110,214 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t
 
 	return GL_TRUE;
 }
+
+
+GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
+		      GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+    GLint offset;
+    GLint nmacroblkpl;
+    GLint nmicroblkpl;
+
+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+        offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+        offset = 0;
+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+                nmacroblkpl = rrb->pitch >> 5;
+                offset += ((y >> 4) * nmacroblkpl) << 11;
+                offset += ((y & 15) >> 1) << 8;
+                offset += (y & 1) << 4;
+                offset += (x >> 5) << 11;
+                offset += ((x & 31) >> 2) << 5;
+                offset += (x & 3) << 2;
+            } else {
+                nmacroblkpl = rrb->pitch >> 6;
+                offset += ((y >> 3) * nmacroblkpl) << 11;
+                offset += (y & 7) << 8;
+                offset += (x >> 6) << 11;
+                offset += ((x & 63) >> 3) << 5;
+                offset += (x & 7) << 2;
+            }
+        } else {
+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
+            offset += (y * nmicroblkpl) << 5;
+            offset += (x >> 3) << 5;
+            offset += (x & 7) << 2;
+        }
+    }
+    return &ptr[offset];
+}
+
+GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
+		      GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+    GLint offset;
+    GLint nmacroblkpl;
+    GLint nmicroblkpl;
+
+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+        offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+        offset = 0;
+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+                nmacroblkpl = rrb->pitch >> 6;
+                offset += ((y >> 4) * nmacroblkpl) << 11;
+                offset += ((y & 15) >> 1) << 8;
+                offset += (y & 1) << 4;
+                offset += (x >> 6) << 11;
+                offset += ((x & 63) >> 3) << 5;
+                offset += (x & 7) << 1;
+            } else {
+                nmacroblkpl = rrb->pitch >> 7;
+                offset += ((y >> 3) * nmacroblkpl) << 11;
+                offset += (y & 7) << 8;
+                offset += (x >> 7) << 11;
+                offset += ((x & 127) >> 4) << 5;
+                offset += (x & 15) << 2;
+            }
+        } else {
+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
+            offset += (y * nmicroblkpl) << 5;
+            offset += (x >> 4) << 5;
+            offset += (x & 15) << 2;
+        }
+    }
+    return &ptr[offset];
+}
+
+GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
+		    GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+    GLint offset;
+    GLint microblkxs;
+    GLint macroblkxs;
+    GLint nmacroblkpl;
+    GLint nmicroblkpl;
+
+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+        offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+        offset = 0;
+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+                microblkxs = 16 / rrb->cpp;
+                macroblkxs = 128 / rrb->cpp;
+                nmacroblkpl = rrb->pitch / macroblkxs;
+                offset += ((y >> 4) * nmacroblkpl) << 11;
+                offset += ((y & 15) >> 1) << 8;
+                offset += (y & 1) << 4;
+                offset += (x / macroblkxs) << 11;
+                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
+                offset += (x & (microblkxs - 1)) * rrb->cpp;
+            } else {
+                microblkxs = 32 / rrb->cpp;
+                macroblkxs = 256 / rrb->cpp;
+                nmacroblkpl = rrb->pitch / macroblkxs;
+                offset += ((y >> 3) * nmacroblkpl) << 11;
+                offset += (y & 7) << 8;
+                offset += (x / macroblkxs) << 11;
+                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
+                offset += (x & (microblkxs - 1)) * rrb->cpp;
+            }
+        } else {
+            microblkxs = 32 / rrb->cpp;
+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
+            offset += (y * nmicroblkpl) << 5;
+            offset += (x / microblkxs) << 5;
+            offset += (x & (microblkxs - 1)) * rrb->cpp;
+        }
+    }
+    return &ptr[offset];
+}
+
+
+static void map_buffer(struct gl_renderbuffer *rb, GLboolean write)
+{
+	struct radeon_renderbuffer *rrb = (void*)rb;
+	int r;
+	
+	if (rrb->bo) {
+		r = radeon_bo_map(rrb->bo, write);
+		if (r) {
+			fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
+				__FUNCTION__, r);
+		}
+	}
+}
+
+static void unmap_buffer(struct gl_renderbuffer *rb)
+{
+	struct radeon_renderbuffer *rrb = (void*)rb;
+
+	if (rrb->bo) {
+		radeon_bo_unmap(rrb->bo);
+	}
+}
+
+void radeonSpanRenderStart(GLcontext * ctx)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	int i;
+
+	rmesa->vtbl.flush_vertices(rmesa);
+
+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+		if (ctx->Texture.Unit[i]._ReallyEnabled)
+			ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
+	}
+
+	/* color draw buffers */
+	for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+		map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE);
+	}
+
+	map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE);
+
+	if (ctx->DrawBuffer->_DepthBuffer) {
+		map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE);
+	}
+	if (ctx->DrawBuffer->_StencilBuffer)
+		map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE);
+
+	/* The locking and wait for idle should really only be needed in classic mode.
+	 * In a future memory manager based implementation, this should become
+	 * unnecessary due to the fact that mapping our buffers, textures, etc.
+	 * should implicitly wait for any previous rendering commands that must
+	 * be waited on. */
+	LOCK_HARDWARE(rmesa);
+	radeonWaitForIdleLocked(rmesa);
+}
+
+void radeonSpanRenderFinish(GLcontext * ctx)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	int i;
+	_swrast_flush(ctx);
+	UNLOCK_HARDWARE(rmesa);
+
+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+		if (ctx->Texture.Unit[i]._ReallyEnabled)
+			ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
+	}
+
+	/* color draw buffers */
+	for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
+		unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
+
+	unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer);
+
+	if (ctx->DrawBuffer->_DepthBuffer)
+		unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
+	if (ctx->DrawBuffer->_StencilBuffer)
+		unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
+}
+
diff --git a/src/mesa/drivers/dri/radeon/common_misc.h b/src/mesa/drivers/dri/radeon/common_misc.h
index 470a3fd..aeff52a 100644
--- a/src/mesa/drivers/dri/radeon/common_misc.h
+++ b/src/mesa/drivers/dri/radeon/common_misc.h
@@ -110,5 +110,12 @@ void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
 			 struct gl_texture_object *texObj,
 			 struct gl_texture_image *texImage);
 
-
+void radeonSpanRenderStart(GLcontext * ctx);
+void radeonSpanRenderFinish(GLcontext * ctx);
+GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
+		    GLint x, GLint y);
+GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
+		    GLint x, GLint y);
+GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
+		    GLint x, GLint y);
 #endif
-- 
2.7.4