(Stephane Marchesin, me) add hyperz support to radeon and r200 drivers. Only fast...
authorRoland Scheidegger <rscheidegger@gmx.ch>
Wed, 8 Dec 2004 17:32:46 +0000 (17:32 +0000)
committerRoland Scheidegger <rscheidegger@gmx.ch>
Wed, 8 Dec 2004 17:32:46 +0000 (17:32 +0000)
16 files changed:
src/mesa/drivers/dri/common/xmlpool.h
src/mesa/drivers/dri/r200/r200_context.c
src/mesa/drivers/dri/r200/r200_context.h
src/mesa/drivers/dri/r200/r200_ioctl.c
src/mesa/drivers/dri/r200/r200_reg.h
src/mesa/drivers/dri/r200/r200_sanity.c
src/mesa/drivers/dri/r200/r200_screen.c
src/mesa/drivers/dri/r200/r200_state.c
src/mesa/drivers/dri/r200/r200_state_init.c
src/mesa/drivers/dri/radeon/radeon_context.c
src/mesa/drivers/dri/radeon/radeon_context.h
src/mesa/drivers/dri/radeon/radeon_ioctl.c
src/mesa/drivers/dri/radeon/radeon_sanity.c
src/mesa/drivers/dri/radeon/radeon_screen.c
src/mesa/drivers/dri/radeon/radeon_state_init.c
src/mesa/drivers/dri/radeon/server/radeon_reg.h

index efd13a1..311fcd0 100644 (file)
@@ -273,6 +273,14 @@ DRI_CONF_OPT_BEGIN_V(vblank_mode,enum,def,"0:3") \
         DRI_CONF_DESC_END \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_HYPERZ_DISABLED 0
+#define DRI_CONF_HYPERZ_ENABLED 1
+#define DRI_CONF_HYPERZ(def) \
+DRI_CONF_OPT_BEGIN(hyperz,bool,def) \
+        DRI_CONF_DESC(en,"Use hyperz") \
+        DRI_CONF_DESC(de,"Hyperz benutzen") \
+DRI_CONF_OPT_END
+
 #define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
 DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
         DRI_CONF_DESC(en,"Number of texture units") \
index e0ede63..4eca4ad 100644 (file)
@@ -62,7 +62,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r200_vtxfmt.h"
 #include "r200_maos.h"
 
-#define DRIVER_DATE    "20041007"
+#define DRIVER_DATE    "20041207"
 
 #include "vblank.h"
 #include "utils.h"
@@ -265,6 +265,14 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
    rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache,
                                                  "def_max_anisotropy");
 
+   if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
+      if ( sPriv->drmMinor < 13 )
+        fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
+                         "disabling.\n",sPriv->drmMinor );
+      else
+        rmesa->using_hyperz = GL_TRUE;
+   }
+
    /* Init default driver functions then plug in our R200-specific functions
     * (the texture functions are especially important)
     */
index 9af5c93..bb5a3cf 100644 (file)
@@ -102,6 +102,7 @@ struct r200_colorbuffer_state {
 
 
 struct r200_depthbuffer_state {
+   GLuint clear;
    GLfloat scale;
 };
 
@@ -930,6 +931,8 @@ struct r200_context {
    /* Configuration cache
     */
    driOptionCache optionCache;
+
+   GLboolean using_hyperz;
 };
 
 #define R200_CONTEXT(ctx)              ((r200ContextPtr)(ctx->DriverCtx))
index 077afd8..04a00b7 100644 (file)
@@ -610,7 +610,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask, GLboolean all,
    }
 
    if ( mask & DD_DEPTH_BIT ) {
-      if ( ctx->Depth.Mask ) flags |= RADEON_DEPTH; /* FIXME: ??? */
+      flags |= RADEON_DEPTH;
       mask &= ~DD_DEPTH_BIT;
    }
 
@@ -628,6 +628,17 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask, GLboolean all,
    if ( !flags ) 
       return;
 
+   if (rmesa->using_hyperz) {
+      flags |= RADEON_USE_COMP_ZBUF;
+/*      if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200)
+        flags |= RADEON_USE_HIERZ; */
+      if (!(rmesa->state.stencil.hwBuffer) ||
+        ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
+           ((rmesa->state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
+         flags |= RADEON_CLEAR_FASTZ;
+      }
+   }
+
    /* Flip top to bottom */
    cx += dPriv->x;
    cy  = dPriv->y + dPriv->h - cy - ch;
@@ -707,7 +718,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask, GLboolean all,
 
       clear.flags       = flags;
       clear.clear_color = rmesa->state.color.clear;
-      clear.clear_depth = 0;   /* not used */
+      clear.clear_depth = rmesa->state.depth.clear;    /* needed for hyperz */
       clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       clear.depth_mask  = rmesa->state.stencil.clear;
       clear.depth_boxes = depth_boxes;
index 89da699..38378a9 100644 (file)
@@ -91,6 +91,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define R200_RB3D_DEPTHOFFSET             0x1c24
 #define R200_RB3D_DEPTHPITCH              0x1c28
 #define     R200_DEPTHPITCH_MASK         0x00001ff8
+#define     R200_DEPTH_HYPERZ            (3 << 16)
 #define     R200_DEPTH_ENDIAN_NO_SWAP    (0 << 18)
 #define     R200_DEPTH_ENDIAN_WORD_SWAP  (1 << 18)
 #define     R200_DEPTH_ENDIAN_DWORD_SWAP (2 << 18)
@@ -112,6 +113,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define     R200_Z_TEST_NEQUAL              (6  <<  4)
 #define     R200_Z_TEST_ALWAYS              (7  <<  4)
 #define     R200_Z_TEST_MASK                (7  <<  4)
+#define     R200_Z_HIERARCHY_ENABLE         (1  <<  8)
 #define     R200_STENCIL_TEST_NEVER         (0  << 12)
 #define     R200_STENCIL_TEST_LESS          (1  << 12)
 #define     R200_STENCIL_TEST_LEQUAL        (2  << 12)
@@ -148,7 +150,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define     R200_STENCIL_ZFAIL_INC_WRAP     (6  << 24)
 #define     R200_STENCIL_ZFAIL_DEC_WRAP     (7  << 24)
 #define     R200_STENCIL_ZFAIL_MASK         (0x7 << 24)
+#define     R200_Z_COMPRESSION_ENABLE       (1  << 28)
+#define     R200_FORCE_Z_DIRTY              (1  << 29)
 #define     R200_Z_WRITE_ENABLE             (1  << 30)
+#define     R200_Z_DECOMPRESSION_ENABLE     (1  << 31)
 /*gap*/
 #define R200_PP_CNTL                      0x1c38 
 #define     R200_TEX_0_ENABLE                         0x00000010
@@ -649,6 +654,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define     R200_CULL_FRONT                     (1<<29)
 #define     R200_CULL_BACK                      (1<<30)
 #define R200_SE_TCL_POINT_SPRITE_CNTL     0x22c4
+#define     R200_POINTSIZE_SEL_STATE            (1<<16)
 /* gap */
 #define R200_SE_VTX_ST_POS_0_X_4                   0x2300
 #define R200_SE_VTX_ST_POS_0_Y_4                   0x2304
index 58d83ad..d45d196 100644 (file)
@@ -143,6 +143,7 @@ static struct {
    { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
    { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
    { R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
+   { R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
 };
 
 struct reg_names {
index 855dd20..4d46acc 100644 (file)
@@ -63,6 +63,7 @@ DRI_CONF_BEGIN
         DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
         DRI_CONF_MAX_TEXTURE_UNITS(4,2,6)
+        DRI_CONF_HYPERZ(false)
     DRI_CONF_SECTION_END
     DRI_CONF_SECTION_QUALITY
         DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
@@ -81,7 +82,7 @@ DRI_CONF_BEGIN
         DRI_CONF_NV_VERTEX_PROGRAM(false)
     DRI_CONF_SECTION_END
 DRI_CONF_END;
-static const GLuint __driNConfigOptions = 14;
+static const GLuint __driNConfigOptions = 15;
 
 #if 1
 /* Including xf86PciInfo.h introduces a bunch of errors...
index 7701256..13a951f 100644 (file)
@@ -374,6 +374,21 @@ static void r200DepthFunc( GLcontext *ctx, GLenum func )
    }
 }
 
+static void r200ClearDepth( GLcontext *ctx, GLclampd d )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
+                   R200_DEPTH_FORMAT_MASK);
+
+   switch ( format ) {
+   case R200_DEPTH_FORMAT_16BIT_INT_Z:
+      rmesa->state.depth.clear = d * 0x0000ffff;
+      break;
+   case R200_DEPTH_FORMAT_24BIT_INT_Z:
+      rmesa->state.depth.clear = d * 0x00ffffff;
+      break;
+   }
+}
 
 static void r200DepthMask( GLcontext *ctx, GLboolean flag )
 {
@@ -2315,7 +2330,7 @@ void r200InitStateFuncs( struct dd_function_table *functions )
    functions->BlendEquationSeparate    = r200BlendEquationSeparate;
    functions->BlendFuncSeparate                = r200BlendFuncSeparate;
    functions->ClearColor               = r200ClearColor;
-   functions->ClearDepth               = NULL;
+   functions->ClearDepth               = r200ClearDepth;
    functions->ClearIndex               = NULL;
    functions->ClearStencil             = r200ClearStencil;
    functions->ClipPlane                        = r200ClipPlane;
index 610706b..5d36a68 100644 (file)
@@ -169,14 +169,16 @@ void r200InitState( r200ContextPtr rmesa )
 
    switch ( ctx->Visual.depthBits ) {
    case 16:
+      rmesa->state.depth.clear = 0x0000ffff;
       rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
       depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z;
       rmesa->state.stencil.clear = 0x00000000;
       break;
    case 24:
+      rmesa->state.depth.clear = 0x00ffffff;
       rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
       depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z;
-      rmesa->state.stencil.clear = 0xff000000;
+      rmesa->state.stencil.clear = 0xffff0000;
       break;
    default:
       fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
@@ -448,15 +450,25 @@ void r200InitState( r200ContextPtr rmesa )
       ((rmesa->r200Screen->depthPitch &
        R200_DEPTHPITCH_MASK) |
        R200_DEPTH_ENDIAN_NO_SWAP);
+   
+   if (rmesa->using_hyperz)
+      rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= R200_DEPTH_HYPERZ;
 
    rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
-                                              R200_Z_TEST_LESS |  
+                                              R200_Z_TEST_LESS |
                                               R200_STENCIL_TEST_ALWAYS |
                                               R200_STENCIL_FAIL_KEEP |
                                               R200_STENCIL_ZPASS_KEEP |
                                               R200_STENCIL_ZFAIL_KEEP |
                                               R200_Z_WRITE_ENABLE);
 
+   if (rmesa->using_hyperz) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE |
+                                                 R200_Z_DECOMPRESSION_ENABLE;
+/*      if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200)
+        rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
+   }
+
    rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE 
                                     | R200_TEX_BLEND_0_ENABLE);
 
index 5f0e2d1..31ac5f2 100644 (file)
@@ -62,7 +62,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_vtxfmt.h"
 #include "radeon_maos.h"
 
-#define DRIVER_DATE    "20041007"
+#define DRIVER_DATE    "20041207"
 
 #include "vblank.h"
 #include "utils.h"
@@ -246,6 +246,14 @@ radeonCreateContext( const __GLcontextModes *glVisual,
    rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache,
                                                  "def_max_anisotropy");
 
+   if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
+      if ( sPriv->drmMinor < 13 )
+        fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
+                         "disabling.\n",sPriv->drmMinor );
+      else
+        rmesa->using_hyperz = GL_TRUE;
+   }
+
    /* Init default driver functions then plug in our Radeon-specific functions
     * (the texture functions are especially important)
     */
index b44ea08..b97e58a 100644 (file)
@@ -782,7 +782,8 @@ struct radeon_context {
     */
    driOptionCache optionCache;
 
+   GLboolean using_hyperz;
+
    /* Performance counters
     */
    GLuint boxes;                       /* Draw performance boxes */
index aa982cf..5b758e6 100644 (file)
@@ -1043,7 +1043,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
    }
 
    if ( mask & DD_DEPTH_BIT ) {
-      if ( ctx->Depth.Mask ) flags |= RADEON_DEPTH; /* FIXME: ??? */
+      flags |= RADEON_DEPTH;
       mask &= ~DD_DEPTH_BIT;
    }
 
@@ -1061,6 +1061,16 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
    if ( !flags ) 
       return;
 
+   if (rmesa->using_hyperz) {
+      flags |= RADEON_USE_COMP_ZBUF;
+/*      if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) 
+         flags |= RADEON_USE_HIERZ; */
+      if (!(rmesa->state.stencil.hwBuffer) ||
+        ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
+           ((rmesa->state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
+         flags |= RADEON_CLEAR_FASTZ;
+      }
+   }
 
    /* Flip top to bottom */
    cx += dPriv->x;
index 4a6d7d1..3a3bbb7 100644 (file)
@@ -139,6 +139,8 @@ static struct {
    { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
    { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
        { 0, 3, "R200_RB3D_BLENDCOLOR" },
+       { 0, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
+
 };
 
 struct reg_names {
index 8346209..d23313d 100644 (file)
@@ -60,6 +60,7 @@ DRI_CONF_BEGIN
         DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN)
         DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+        DRI_CONF_HYPERZ(false)
     DRI_CONF_SECTION_END
     DRI_CONF_SECTION_QUALITY
         DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
@@ -74,7 +75,7 @@ DRI_CONF_BEGIN
         DRI_CONF_NO_RAST(false)
     DRI_CONF_SECTION_END
 DRI_CONF_END;
-static const GLuint __driNConfigOptions = 11;
+static const GLuint __driNConfigOptions = 12;
 
 #if 1
 /* Including xf86PciInfo.h introduces a bunch of errors...
index 0945fa8..1dece86 100644 (file)
@@ -174,7 +174,7 @@ void radeonInitState( radeonContextPtr rmesa )
       rmesa->state.depth.clear = 0x00ffffff;
       rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
       depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
-      rmesa->state.stencil.clear = 0xff000000;
+      rmesa->state.stencil.clear = 0xffff0000;
       break;
    default:
       fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
@@ -329,6 +329,9 @@ void radeonInitState( radeonContextPtr rmesa )
       ((rmesa->radeonScreen->depthPitch &
        RADEON_DEPTHPITCH_MASK) |
        RADEON_DEPTH_ENDIAN_NO_SWAP);
+       
+   if (rmesa->using_hyperz)
+       rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= RADEON_DEPTH_HYPERZ;
 
    rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
                                               RADEON_Z_TEST_LESS |
@@ -338,6 +341,17 @@ void radeonInitState( radeonContextPtr rmesa )
                                               RADEON_STENCIL_ZFAIL_KEEP |
                                               RADEON_Z_WRITE_ENABLE);
 
+   if (rmesa->using_hyperz) {
+       rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_COMPRESSION_ENABLE |
+                                                  RADEON_Z_DECOMPRESSION_ENABLE;
+      if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) {
+        /* works for q3, but slight rendering errors with glxgears ? */
+/*      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
+        /* need this otherwise get lots of lockups with q3 ??? */
+        rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_FORCE_Z_DIRTY;
+      } 
+   }
+
    rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (RADEON_SCISSOR_ENABLE |
                                     RADEON_ANTI_ALIAS_NONE);
 
index 4898dbf..a35aa21 100644 (file)
 #define RADEON_RB3D_DEPTHOFFSET             0x1c24
 #define RADEON_RB3D_DEPTHPITCH              0x1c28
 #       define RADEON_DEPTHPITCH_MASK         0x00001ff8
+#       define RADEON_DEPTH_HYPERZ            (3 << 16)
 #       define RADEON_DEPTH_ENDIAN_NO_SWAP    (0 << 18)
 #       define RADEON_DEPTH_ENDIAN_WORD_SWAP  (1 << 18)
 #       define RADEON_DEPTH_ENDIAN_DWORD_SWAP (2 << 18)
 #       define RADEON_Z_TEST_NEQUAL              (6  <<  4)
 #       define RADEON_Z_TEST_ALWAYS              (7  <<  4)
 #       define RADEON_Z_TEST_MASK                (7  <<  4)
+#       define RADEON_Z_HIERARCHY_ENABLE         (1  <<  8)
 #       define RADEON_STENCIL_TEST_NEVER         (0  << 12)
 #       define RADEON_STENCIL_TEST_LESS          (1  << 12)
 #       define RADEON_STENCIL_TEST_LEQUAL        (2  << 12)
 #       define RADEON_Z_COMPRESSION_ENABLE       (1  << 28)
 #       define RADEON_FORCE_Z_DIRTY              (1  << 29)
 #       define RADEON_Z_WRITE_ENABLE             (1  << 30)
+#       define RADEON_Z_DECOMPRESSION_ENABLE     (1  << 31)
 #define RADEON_RE_LINE_PATTERN              0x1cd0
 #       define RADEON_LINE_PATTERN_MASK             0x0000ffff
 #       define RADEON_LINE_REPEAT_COUNT_SHIFT       16