Expand the VME output buffer for Inter frame
authorXiang, Haihao <haihao.xiang@intel.com>
Thu, 5 Apr 2012 07:49:42 +0000 (15:49 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Fri, 6 Apr 2012 08:07:02 +0000 (16:07 +0800)
Prepare for 32 MVs(128 bytes) and other information(32 bytes) from VME. In addition, use
macros instead of magic numbers

Signed-off-by :Xiang, Haihao <haihao.xiang@intel.com>

src/gen6_mfc.c
src/gen6_vme.c
src/gen6_vme.h
src/shaders/utils/mfc_batchbuffer.inc
src/shaders/utils/mfc_batchbuffer_avc_inter.asm
src/shaders/utils/mfc_batchbuffer_avc_inter.g6b
src/shaders/utils/mfc_batchbuffer_avc_inter.g7b
src/shaders/vme/inter_frame.asm
src/shaders/vme/inter_frame.g6b
src/shaders/vme/inter_frame.g7b
src/shaders/vme/vme.inc

index 5ae745a..6d40748 100644 (file)
@@ -1138,9 +1138,9 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     if (is_intra) {
         dri_bo_map(vme_context->vme_output.bo , 1);
         msg = (unsigned int *)vme_context->vme_output.bo->virtual;
-        msg += pSliceParameter->macroblock_address * 4;
+        msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
     } else {
-        offset = pSliceParameter->macroblock_address * 64;
+        offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
     }
    
     for (i = pSliceParameter->macroblock_address; 
@@ -1152,10 +1152,10 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
         if (is_intra) {
             assert(msg);
             gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
-            msg += 4;
+            msg += INTRA_VME_OUTPUT_IN_DWS;
         } else {
             gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
-            offset += 64;
+            offset += INTER_VME_OUTPUT_IN_BYTES;
         }
     }
    
index aa53270..f6b4427 100644 (file)
@@ -181,9 +181,9 @@ gen6_vme_output_buffer_setup(VADriverContextP ctx,
     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
 
     if (is_intra)
-        vme_context->vme_output.size_block = 16; /* in bytes */
+        vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
     else
-        vme_context->vme_output.size_block = 64; /* in bytes */
+        vme_context->vme_output.size_block = INTER_VME_OUTPUT_IN_BYTES;
 
     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, 
                                               "VME output buffer",
index f38577d..ad302da 100644 (file)
 
 #include "i965_gpe_utils.h"
 
+#define INTRA_VME_OUTPUT_IN_BYTES       16      /* in bytes */
+#define INTRA_VME_OUTPUT_IN_DWS         (INTRA_VME_OUTPUT_IN_BYTES / 4)
+#define INTER_VME_OUTPUT_IN_BYTES       160     /* the first 128 bytes for MVs and the last 32 bytes for other info */
+#define INTER_VME_OUTPUT_IN_DWS         (INTER_VME_OUTPUT_IN_BYTES / 4)
+
 #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
 #define MAX_MEDIA_SURFACES_GEN6      34
 
index 9ec96e1..eaf3d33 100644 (file)
@@ -59,6 +59,8 @@ define(`OB_WRITE_COMMIT_CATEGORY',      `0')    /* category on Ivybridge */
 
 define(`OB_HEADER_PRESENT',             `1')
 
+define(`INTER_VME_OUTPUT_IN_BYTES',     `160')
+
 define(`MFC_AVC_PAK_OBJECT_INTRA_DW0',  `0x71490009:UD')
 define(`MFC_AVC_PAK_OBJECT_INTRA_DW3',  `0x000e0000:UD')        /* CbpDC (1 << 19 | 1 << 18 | 1 << 17) */
 define(`MFC_AVC_PAK_OBJECT_INTRA_DW4',  `0xFFFF0000:UD')        /* CBP for Y */
@@ -67,7 +69,7 @@ define(`MFC_AVC_PAK_OBJECT_INTRA_DW6',  `0x04000000:UD')        /* the flag of t
 
 define(`MFC_AVC_PAK_OBJECT_INTER_DW0',  `MFC_AVC_PAK_OBJECT_INTRA_DW0')
 define(`MFC_AVC_PAK_OBJECT_INTER_DW1',  `0x20:UD')              /* 32 MVs */
-define(`MFC_AVC_PAK_OBJECT_INTER_DW2',  `0x40:UD')              /* offset, in bytes */
+define(`MFC_AVC_PAK_OBJECT_INTER_DW2',  `INTER_VME_OUTPUT_IN_BYTES:UD') /* offset, in bytes */
 define(`MFC_AVC_PAK_OBJECT_INTER_DW3',  `0x014e0120:UD')        /* 
                                                                  * (1 << 24) |     PackedMvNum, Debug
                                                                  * (4 << 20) |     8 MV, SNB don't use it
index e6be62f..3aa917d 100644 (file)
@@ -33,7 +33,7 @@ __PAK_OBJECT:
         /* DW2 */
         mul             (1)     pak_object2_ud<1>:ud            width_in_mb<0,1,0>:uw           mb_y<0,1,0>:ub {align1};
         add             (1)     pak_object2_ud<1>:ud            pak_object2_ud<0,1,0>:ud        mb_x<0,1,0>:ub {align1};
-        shl             (1)     pak_object2_ud<1>:ud            pak_object2_ud<0,1,0>:ud        6:uw {align1} ;
+        mul             (1)     pak_object2_ud<1>:ud            pak_object2_ud<0,1,0>:ud        INTER_VME_OUTPUT_IN_BYTES:ud {align1} ;
         /* DW3 */
         mov             (1)     pak_object3_ud<1>:ud            MFC_AVC_PAK_OBJECT_INTER_DW3 ;
         /* DW5 */
index dee64c6..1e6c807 100644 (file)
@@ -20,7 +20,7 @@
    { 0x00000001, 0x23440061, 0x00000000, 0x00000020 },
    { 0x00000041, 0x23484521, 0x000000b4, 0x000000b1 },
    { 0x00000040, 0x23484421, 0x00000348, 0x000000b0 },
-   { 0x00000009, 0x23482c21, 0x00000348, 0x00060006 },
+   { 0x00000041, 0x23480c21, 0x00000348, 0x000000a0 },
    { 0x00000001, 0x234c0061, 0x00000000, 0x014e0120 },
    { 0x00000001, 0x23540061, 0x00000000, 0x000f000f },
    { 0x01000005, 0x20002d28, 0x020000ac, 0x00020002 },
@@ -39,7 +39,7 @@
    { 0x00600001, 0x20400022, 0x008d0360, 0x00000000 },
    { 0x05800031, 0x23001cdd, 0x00000000, 0x061b0302 },
    { 0x00000040, 0x21480c21, 0x00000148, 0x00000004 },
-   { 0x00000040, 0x23480c21, 0x00000348, 0x00000040 },
+   { 0x00000040, 0x23480c21, 0x00000348, 0x000000a0 },
    { 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff },
    { 0x00110020, 0x34001c00, 0x00001400, 0xffffffdc },
    { 0x00010020, 0x34001c00, 0x02001400, 0x0000001e },
index f0e5fa7..514da20 100644 (file)
@@ -20,7 +20,7 @@
    { 0x00000001, 0x23440061, 0x00000000, 0x00000020 },
    { 0x00000041, 0x23484521, 0x000000b4, 0x000000b1 },
    { 0x00000040, 0x23484421, 0x00000348, 0x000000b0 },
-   { 0x00000009, 0x23482c21, 0x00000348, 0x00060006 },
+   { 0x00000041, 0x23480c21, 0x00000348, 0x000000a0 },
    { 0x00000001, 0x234c0061, 0x00000000, 0x014e0120 },
    { 0x00000001, 0x23540061, 0x00000000, 0x000f000f },
    { 0x01000005, 0x20002d28, 0x020000ac, 0x00020002 },
@@ -39,7 +39,7 @@
    { 0x00600001, 0x28400021, 0x008d0360, 0x00000000 },
    { 0x0a800031, 0x20001cac, 0x00000800, 0x060a0302 },
    { 0x00000040, 0x21480c21, 0x00000148, 0x00000004 },
-   { 0x00000040, 0x23480c21, 0x00000348, 0x00000040 },
+   { 0x00000040, 0x23480c21, 0x00000348, 0x000000a0 },
    { 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff },
    { 0x00110020, 0x34001c00, 0x00001400, 0xffffffdc },
    { 0x00010020, 0x34001c00, 0x02001400, 0x0000001e },
index 667119e..16fb1c6 100644 (file)
@@ -44,7 +44,7 @@ mov  (1) vme_m1.8<1>:UD         START_CENTER + SEARCH_PATH_LEN:UD {align1};
 
 mul  (1) obw_m0.8<1>:UD         w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
 add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
-shl  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x2:UD {align1};
+mul  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1};
 mov  (1) obw_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
         
 /*
@@ -129,7 +129,7 @@ cmp.e.f0.0      (1)     null<1>:uw              w_in_mb_uw<0,1,0>:uw
 (f0.0)add       (1)     vme_m0.2<1>:w           vme_m0.2<0,1,0>:w               16:w {align1};
 #endif
 
-add             (1)     obw_m0.8<1>:UD          obw_m0.8<0,1,0>:UD              4:UW {align1} ;    /* offset += 4 */
+add             (1)     obw_m0.8<1>:UD          obw_m0.8<0,1,0>:UD              INTER_VME_OUTPUT_IN_OWS:UW {align1} ;
         
 add.z.f0.1      (1)     num_macroblocks<1>:w    num_macroblocks<0,1,0>:w        -1:w {align1} ;
 (-f0.1)jmpi     (1)     __VME_LOOP ;
index 134f413..bf5844d 100644 (file)
@@ -13,7 +13,7 @@
    { 0x00000001, 0x24680061, 0x00000000, 0x30003f0c },
    { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 },
    { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
-   { 0x00000009, 0x24880c21, 0x00000488, 0x00000002 },
+   { 0x00000041, 0x24880c21, 0x00000488, 0x0000000a },
    { 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
    { 0x00600001, 0x20000022, 0x008d0440, 0x00000000 },
    { 0x00600001, 0x20200022, 0x008d0460, 0x00000000 },
@@ -37,7 +37,7 @@
    { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 },
    { 0x00010001, 0x244001ed, 0x00000000, 0xfff0fff0 },
    { 0x00010040, 0x24423dad, 0x00000442, 0x00100010 },
-   { 0x00000040, 0x24882c21, 0x00000488, 0x00040004 },
+   { 0x00000040, 0x24882c21, 0x00000488, 0x000a000a },
    { 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff },
    { 0x00110020, 0x34001c00, 0x02001400, 0xffffffce },
    { 0x00600001, 0x20000022, 0x008d0000, 0x00000000 },
index 7257654..06408f4 100644 (file)
@@ -12,7 +12,7 @@
    { 0x00000001, 0x24680061, 0x00000000, 0x30003f0c },
    { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 },
    { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
-   { 0x00000009, 0x24880c21, 0x00000488, 0x00000002 },
+   { 0x00000041, 0x24880c21, 0x00000488, 0x0000000a },
    { 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
    { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
    { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
@@ -31,7 +31,7 @@
    { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 },
    { 0x00010001, 0x24480169, 0x00000000, 0x00000000 },
    { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 },
-   { 0x00000040, 0x24882c21, 0x00000488, 0x00040004 },
+   { 0x00000040, 0x24882c21, 0x00000488, 0x000a000a },
    { 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff },
    { 0x00110020, 0x34001c00, 0x02001400, 0xffffffd8 },
    { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 },
index 73a554d..68fcada 100644 (file)
@@ -65,6 +65,9 @@ define(`START_CENTER',                  `0x30000000')
 define(`ADAPTIVE_SEARCH_ENABLE',        `0x00000002') 
 define(`INTRA_PREDICTORE_MODE',         `0x11111111:UD')
 
+define(`INTER_VME_OUTPUT_IN_OWS',       `10')
+define(`INTER_VME_OUTPUT_MV_IN_OWS',    `8')
+
 #ifdef DEV_SNB
 
 define(`OBW_CACHE_TYPE',                `5')