i965: Use Compr4 instruction compression mode on G4X and newer.
authorEric Anholt <eric@anholt.net>
Sat, 7 Nov 2009 01:45:13 +0000 (17:45 -0800)
committerEric Anholt <eric@anholt.net>
Sat, 7 Nov 2009 05:24:22 +0000 (21:24 -0800)
No statistically significant performance difference at n=3 with either
openarena or my GL demo, but cutting program size seems like a good
thing to be doing for the hypothetical app that has a working set near
icache size.

src/mesa/drivers/dri/i965/brw_eu.h
src/mesa/drivers/dri/i965/brw_eu_emit.c
src/mesa/drivers/dri/i965/brw_wm_emit.c

index 30603bd..39eb88d 100644 (file)
@@ -170,11 +170,11 @@ static INLINE struct brw_reg brw_reg( GLuint file,
                                       GLuint writemask )
 {
    struct brw_reg reg;
-   if (type == BRW_GENERAL_REGISTER_FILE)
+   if (file == BRW_GENERAL_REGISTER_FILE)
       assert(nr < BRW_MAX_GRF);
-   else if (type == BRW_MESSAGE_REGISTER_FILE)
-      assert(nr < BRW_MAX_MRF);
-   else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
+   else if (file == BRW_MESSAGE_REGISTER_FILE)
+      assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+   else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
       assert(nr <= BRW_ARF_IP);
 
    reg.type = type;
@@ -538,7 +538,7 @@ static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
 
 static INLINE struct brw_reg brw_message_reg( GLuint nr )
 {
-   assert(nr < BRW_MAX_MRF);
+   assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
    return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
                       nr,
                       0);
index 241cdc3..7ceabba 100644 (file)
@@ -55,7 +55,8 @@ static void guess_execution_size( struct brw_instruction *insn,
 static void brw_set_dest( struct brw_instruction *insn,
                          struct brw_reg dest )
 {
-   if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
+   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
+       dest.file != BRW_MESSAGE_REGISTER_FILE)
       assert(dest.nr < 128);
 
    insn->bits1.da1.dest_reg_file = dest.file;
index 9cd1fed..eb37ea1 100644 (file)
@@ -1105,6 +1105,7 @@ void emit_fb_write(struct brw_wm_compile *c,
                   GLuint eot)
 {
    struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
    GLuint nr = 2;
    GLuint channel;
 
@@ -1119,18 +1120,28 @@ void emit_fb_write(struct brw_wm_compile *c,
    brw_push_insn_state(p);
 
    for (channel = 0; channel < 4; channel++) {
-      /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
-      /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
-      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-      brw_MOV(p,
-             brw_message_reg(nr + channel),
-             arg0[channel]);
-
-      if (c->dispatch_width == 16) {
-        brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) {
+        /* By setting the high bit of the MRF register number, we indicate
+         * that we want COMPR4 mode - instead of doing the usual destination
+         * + 1 for the second half we get destination + 4.
+         */
         brw_MOV(p,
-                brw_message_reg(nr + channel + 4),
-                sechalf(arg0[channel]));
+                brw_message_reg(nr + channel + (1 << 7)),
+                arg0[channel]);
+      } else {
+        /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
+        /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
+        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+        brw_MOV(p,
+                brw_message_reg(nr + channel),
+                arg0[channel]);
+
+        if (c->dispatch_width == 16) {
+           brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+           brw_MOV(p,
+                   brw_message_reg(nr + channel + 4),
+                   sechalf(arg0[channel]));
+        }
       }
    }
    /* skip over the regs populated above: