MFC: optimize the MFC batchbuffer shader
authorXiang, Haihao <haihao.xiang@intel.com>
Thu, 15 Mar 2012 06:46:32 +0000 (14:46 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Thu, 15 Mar 2012 06:47:24 +0000 (14:47 +0800)
src/gen6_mfc.c
src/shaders/utils/mfc_batchbuffer_avc_inter.asm
src/shaders/utils/mfc_batchbuffer_avc_inter.g6b
src/shaders/utils/mfc_batchbuffer_avc_inter.g7b
src/shaders/utils/mfc_batchbuffer_avc_intra.asm
src/shaders/utils/mfc_batchbuffer_avc_intra.g6b
src/shaders/utils/mfc_batchbuffer_avc_intra.g7b

index e1a1a46..21d828b 100644 (file)
@@ -40,6 +40,8 @@
 #include "gen6_mfc.h"
 #include "gen6_vme.h"
 
+#define CMD_LEN_IN_OWORD        4
+
 static const uint32_t gen6_mfc_batchbuffer_avc_intra[][4] = {
 #include "shaders/utils/mfc_batchbuffer_avc_intra.g6b"
 };
@@ -1238,7 +1240,7 @@ gen6_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 2 + 1;
-    mfc_context->mfc_batchbuffer_surface.size_block = 48; /* 3 OWORDs */
+    mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
     mfc_context->mfc_batchbuffer_surface.pitch = 16;
     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
                                                            "MFC batchbuffer",
@@ -1374,7 +1376,7 @@ gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
     int total_mbs = slice_param->number_of_mbs;
-    int number_mb_cmds = 512;
+    int number_mb_cmds = 128;
     int starting_mb = 0;
     int last_object = 0;
     int first_object = 1;
@@ -1415,7 +1417,7 @@ gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
             batchbuffer_offset += tail_size;
         }
 
-        batchbuffer_offset += number_mb_cmds * 3;
+        batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
 
         first_object = 0;
     }
@@ -1553,7 +1555,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                                            qp,
                                            last_slice);
 
-    return head_size + tail_size + pSliceParameter->number_of_mbs * 3;
+    return head_size + tail_size + pSliceParameter->number_of_mbs * CMD_LEN_IN_OWORD;
 }
 
 static void
index c98f9ed..e6be62f 100644 (file)
@@ -58,6 +58,7 @@ __PAK_OBJECT_LOOP:
 
         mov             (8)     msg_reg0.0<1>:ud                tmp_mfc_batchbuffer<8,8,1>:ud {align1} ;
         mov             (8)     msg_reg1.0<1>:ud                pak_object_ud<8,8,1>:ud {align1} ;
+        mov             (8)     msg_reg2.0<1>:ud                pak_object8_ud<8,8,1>:ud {align1} ;
         
 send (16)
         msg_ind
@@ -66,41 +67,18 @@ send (16)
         data_port(
                 OB_CACHE_TYPE,
                 OB_WRITE,
-                OB_CONTROL_2,
+                OB_CONTROL_3,
                 BIND_IDX_MFC_BATCHBUFFER,
                 OB_WRITE_COMMIT_CATEGORY,
                 OB_HEADER_PRESENT
         )
-        mlen 2
+        mlen 3
         rlen ob_write_wb_length
         {align1};
 
         /* the new offset */
-        add             (1)     tmp_mfc_batchbuffer.8<1>:ud     tmp_mfc_batchbuffer.8<0,1,0>:ud 2:ud {align1} ;
-
-        mov             (8)     msg_reg0.0<1>:ud                tmp_mfc_batchbuffer<8,8,1>:ud {align1} ;
-        mov             (4)     msg_reg1.0<1>:ud                pak_object8_ud<4,4,1>:ud {align1} ;
-
-send (16)
-        msg_ind
-        ob_write_wb
-        null
-        data_port(
-                OB_CACHE_TYPE,
-                OB_WRITE,
-                OB_CONTROL_0,
-                BIND_IDX_MFC_BATCHBUFFER,
-                OB_WRITE_COMMIT_CATEGORY,
-                OB_HEADER_PRESENT
-        )
-        mlen 2
-        rlen ob_write_wb_length
-        {align1};
-
-        /* the new offset */
-        /* DW2 */
+        add             (1)     tmp_mfc_batchbuffer.8<1>:ud     tmp_mfc_batchbuffer.8<0,1,0>:ud 4:ud {align1} ;
         add             (1)     pak_object2_ud<1>:ud            pak_object2_ud<0,1,0>:ud        MFC_AVC_PAK_OBJECT_INTER_DW2 {align1} ;
-        add             (1)     tmp_mfc_batchbuffer.8<1>:ud     tmp_mfc_batchbuffer.8<0,1,0>:ud 1:ud {align1} ;
         
         add.z.f0.0      (1)    total_mbs<1>:w                  total_mbs<0,1,0>:w              -1:w {align1};
         (-f0.0)jmpi     (1)     __PAK_OBJECT_LOOP ;
index 3885c2b..dee64c6 100644 (file)
    { 0x00000040, 0x23584421, 0x00000358, 0x000000b6 },
    { 0x00600001, 0x20000022, 0x008d0140, 0x00000000 },
    { 0x00600001, 0x20200022, 0x008d0340, 0x00000000 },
-   { 0x05800031, 0x23001cdd, 0x00000000, 0x041b0202 },
-   { 0x00000040, 0x21480c21, 0x00000148, 0x00000002 },
-   { 0x00600001, 0x20000022, 0x008d0140, 0x00000000 },
-   { 0x00400001, 0x20200022, 0x00690360, 0x00000000 },
-   { 0x05800031, 0x23001cdd, 0x00000000, 0x041b0002 },
+   { 0x00600001, 0x20400022, 0x008d0360, 0x00000000 },
+   { 0x05800031, 0x23001cdd, 0x00000000, 0x061b0302 },
+   { 0x00000040, 0x21480c21, 0x00000148, 0x00000004 },
    { 0x00000040, 0x23480c21, 0x00000348, 0x00000040 },
-   { 0x00000040, 0x21480c21, 0x00000148, 0x00000001 },
    { 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff },
-   { 0x00110020, 0x34001c00, 0x00001400, 0xffffffd6 },
+   { 0x00110020, 0x34001c00, 0x00001400, 0xffffffdc },
    { 0x00010020, 0x34001c00, 0x02001400, 0x0000001e },
    { 0x00600001, 0x20000022, 0x008d0120, 0x00000000 },
    { 0x05800031, 0x22001cc9, 0x00000000, 0x021a0001 },
index e8419fd..f0e5fa7 100644 (file)
    { 0x00000040, 0x23584421, 0x00000358, 0x000000b6 },
    { 0x00600001, 0x28000021, 0x008d0140, 0x00000000 },
    { 0x00600001, 0x28200021, 0x008d0340, 0x00000000 },
-   { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0202 },
-   { 0x00000040, 0x21480c21, 0x00000148, 0x00000002 },
-   { 0x00600001, 0x28000021, 0x008d0140, 0x00000000 },
-   { 0x00400001, 0x28200021, 0x00690360, 0x00000000 },
-   { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0002 },
+   { 0x00600001, 0x28400021, 0x008d0360, 0x00000000 },
+   { 0x0a800031, 0x20001cac, 0x00000800, 0x060a0302 },
+   { 0x00000040, 0x21480c21, 0x00000148, 0x00000004 },
    { 0x00000040, 0x23480c21, 0x00000348, 0x00000040 },
-   { 0x00000040, 0x21480c21, 0x00000148, 0x00000001 },
    { 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff },
-   { 0x00110020, 0x34001c00, 0x00001400, 0xffffffd6 },
+   { 0x00110020, 0x34001c00, 0x00001400, 0xffffffdc },
    { 0x00010020, 0x34001c00, 0x02001400, 0x0000001e },
    { 0x00600001, 0x28000021, 0x008d0120, 0x00000000 },
    { 0x0a800031, 0x22001ca9, 0x00000800, 0x02180001 },
index 95a5842..838ab13 100644 (file)
@@ -82,6 +82,7 @@ send (16)
         
         mov             (8)     msg_reg0.0<1>:ud                tmp_mfc_batchbuffer<8,8,1>:ud {align1} ;
         mov             (8)     msg_reg1.0<1>:ud                pak_object_ud<8,8,1>:ud {align1} ;
+        mov             (8)     msg_reg2.0<1>:ud                pak_object8_ud<8,8,1>:ud {align1} ;        
 
         /* the new offset */
         add             (1)     tmp_vme_output.8<1>:ud          tmp_vme_output.8<0,1,0>:ud      1:ud {align1} ;
@@ -93,40 +94,18 @@ send (16)
         data_port(
                 OB_CACHE_TYPE,
                 OB_WRITE,
-                OB_CONTROL_2,
+                OB_CONTROL_3,
                 BIND_IDX_MFC_BATCHBUFFER,
                 OB_WRITE_COMMIT_CATEGORY,
                 OB_HEADER_PRESENT
         )
-        mlen 2
+        mlen 3
         rlen ob_write_wb_length
         {align1};
 
         /* the new offset */
-        add             (1)     tmp_mfc_batchbuffer.8<1>:ud     tmp_mfc_batchbuffer.8<0,1,0>:ud 2:ud {align1} ;
+        add             (1)     tmp_mfc_batchbuffer.8<1>:ud     tmp_mfc_batchbuffer.8<0,1,0>:ud 4:ud {align1} ;
 
-        mov             (8)     msg_reg0.0<1>:ud                tmp_mfc_batchbuffer<8,8,1>:ud {align1} ;
-        mov             (4)     msg_reg1.0<1>:ud                pak_object8_ud<4,4,1>:ud {align1} ;
-
-send (16)
-        msg_ind
-        ob_write_wb
-        null
-        data_port(
-                OB_CACHE_TYPE,
-                OB_WRITE,
-                OB_CONTROL_0,
-                BIND_IDX_MFC_BATCHBUFFER,
-                OB_WRITE_COMMIT_CATEGORY,
-                OB_HEADER_PRESENT
-        )
-        mlen 2
-        rlen ob_write_wb_length
-        {align1};
-
-        /* the new offset */
-        add             (1)     tmp_mfc_batchbuffer.8<1>:ud     tmp_mfc_batchbuffer.8<0,1,0>:ud    1:ud {align1} ;
-        
         add.z.f0.0      (1)    total_mbs<1>:w                  total_mbs<0,1,0>:w      -1:w {align1};
         (-f0.0)jmpi     (1)     __PAK_OBJECT_LOOP ;
         
index c63178d..3222bd6 100644 (file)
    { 0x00000005, 0x23640c21, 0x0000020c, 0x000000fc },
    { 0x00600001, 0x20000022, 0x008d0140, 0x00000000 },
    { 0x00600001, 0x20200022, 0x008d0340, 0x00000000 },
+   { 0x00600001, 0x20400022, 0x008d0360, 0x00000000 },
    { 0x00000040, 0x21080c21, 0x00000108, 0x00000001 },
-   { 0x05800031, 0x23001cdd, 0x00000000, 0x041b0202 },
-   { 0x00000040, 0x21480c21, 0x00000148, 0x00000002 },
-   { 0x00600001, 0x20000022, 0x008d0140, 0x00000000 },
-   { 0x00400001, 0x20200022, 0x00690360, 0x00000000 },
-   { 0x05800031, 0x23001cdd, 0x00000000, 0x041b0002 },
-   { 0x00000040, 0x21480c21, 0x00000148, 0x00000001 },
+   { 0x05800031, 0x23001cdd, 0x00000000, 0x061b0302 },
+   { 0x00000040, 0x21480c21, 0x00000148, 0x00000004 },
    { 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff },
-   { 0x00110020, 0x34001c00, 0x00001400, 0xffffffc8 },
+   { 0x00110020, 0x34001c00, 0x00001400, 0xffffffce },
    { 0x00010020, 0x34001c00, 0x02001400, 0x0000001e },
    { 0x00600001, 0x20000022, 0x008d0120, 0x00000000 },
    { 0x05800031, 0x22001cc9, 0x00000000, 0x021a0001 },
index 8d9a08a..2bf1612 100644 (file)
    { 0x00000005, 0x23640c21, 0x0000020c, 0x000000fc },
    { 0x00600001, 0x28000021, 0x008d0140, 0x00000000 },
    { 0x00600001, 0x28200021, 0x008d0340, 0x00000000 },
+   { 0x00600001, 0x28400021, 0x008d0360, 0x00000000 },
    { 0x00000040, 0x21080c21, 0x00000108, 0x00000001 },
-   { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0202 },
-   { 0x00000040, 0x21480c21, 0x00000148, 0x00000002 },
-   { 0x00600001, 0x28000021, 0x008d0140, 0x00000000 },
-   { 0x00400001, 0x28200021, 0x00690360, 0x00000000 },
-   { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0002 },
-   { 0x00000040, 0x21480c21, 0x00000148, 0x00000001 },
+   { 0x0a800031, 0x20001cac, 0x00000800, 0x060a0302 },
+   { 0x00000040, 0x21480c21, 0x00000148, 0x00000004 },
    { 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff },
-   { 0x00110020, 0x34001c00, 0x00001400, 0xffffffc8 },
+   { 0x00110020, 0x34001c00, 0x00001400, 0xffffffce },
    { 0x00010020, 0x34001c00, 0x02001400, 0x0000001e },
    { 0x00600001, 0x28000021, 0x008d0120, 0x00000000 },
    { 0x0a800031, 0x22001ca9, 0x00000800, 0x02180001 },