Fix the error of reading neighbour macroblock pixels during VME prediction on Snb/Ivy
authorZhao Yakui <yakui.zhao@intel.com>
Wed, 9 Jan 2013 04:29:59 +0000 (12:29 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Thu, 17 Jan 2013 05:08:40 +0000 (13:08 +0800)
Otherwise the incorrect result of VME prediction is used.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
src/shaders/vme/inter_frame.asm
src/shaders/vme/inter_frame.g6b
src/shaders/vme/inter_frame.g7b
src/shaders/vme/mpeg2_inter_frame.g7b

index ce4bd11..e1b6e68 100644 (file)
@@ -239,6 +239,7 @@ add             (1)     vme_m0.0<1>:W           vme_m0.0<0,1,0>:W
 cmp.e.f0.0      (1)     null<1>:uw              w_in_mb_uw<0,1,0>:uw            orig_x_ub<0,1,0>:ub {align1};
 /* (0, y + 1) */        
 (f0.0)mov       (1)     orig_x_ub<1>:ub         0:uw {align1} ;
+(f0.0)add       (1)     orig_y_ub<1>:ub orig_y_ub<0,1,0>:ub             1:uw {align1} ;
 (f0.0)mov       (1)     vme_m0.8<1>:uw          0:uw {align1} ;
 (f0.0)add       (1)     vme_m0.10<1>:uw         vme_m0.10<0,1,0>:uw             16:uw {align1} ;
 #ifdef DEV_SNB        
@@ -246,6 +247,15 @@ cmp.e.f0.0      (1)     null<1>:uw              w_in_mb_uw<0,1,0>:uw
 (f0.0)add       (1)     vme_m0.2<1>:w           vme_m0.2<0,1,0>:w               16:w {align1};
 #endif
 
+shl  (2) read0_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
+add  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D -8:W {align1};     /* X offset */
+add  (1) read0_header.4<1>:D    read0_header.4<0,1,0>:D -1:W {align1};     /* Y offset */ 
+
+shl  (2) read1_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
+add  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D -4:W {align1};     /* X offset */
+
+shl  (2) vme_m0.8<1>:UW         orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* Source =  (x, y) * 16 */
+
 add             (1)     obw_m0.8<1>:UD          obw_m0.8<0,1,0>:UD              INTER_VME_OUTPUT_IN_OWS:UW {align1} ;
         
 add.z.f0.1      (1)     num_macroblocks<1>:w    num_macroblocks<0,1,0>:w        -1:w {align1} ;
index ea66788..ca251bb 100644 (file)
    { 0x00000040, 0x24403dad, 0x00000440, 0x00100010 },
    { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 },
    { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 },
+   { 0x00010040, 0x20a12e31, 0x000000a1, 0x00010001 },
    { 0x00010001, 0x24480169, 0x00000000, 0x00000000 },
    { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 },
    { 0x00010001, 0x244001ed, 0x00000000, 0xfff0fff0 },
    { 0x00010040, 0x24423dad, 0x00000442, 0x00100010 },
+   { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
+   { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
+   { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
+   { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
+   { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
+   { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
    { 0x00000040, 0x24882c21, 0x00000488, 0x000a000a },
    { 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff },
-   { 0x00110020, 0x34001c00, 0x02001400, 0xffffff5e },
+   { 0x00110020, 0x34001c00, 0x02001400, 0xffffff50 },
    { 0x00600001, 0x20000022, 0x008d0000, 0x00000000 },
    { 0x07800031, 0x24001cc8, 0x00000000, 0x82000010 },
index 4601ba8..5273200 100644 (file)
    { 0x00000040, 0x24482d29, 0x00000448, 0x00100010 },
    { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 },
    { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 },
+   { 0x00010040, 0x20a12e31, 0x000000a1, 0x00010001 },
    { 0x00010001, 0x24480169, 0x00000000, 0x00000000 },
    { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 },
+   { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
+   { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
+   { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
+   { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
+   { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
+   { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
    { 0x00000040, 0x24882c21, 0x00000488, 0x000a000a },
    { 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff },
-   { 0x00110020, 0x34001c00, 0x02001400, 0xffffff74 },
+   { 0x00110020, 0x34001c00, 0x02001400, 0xffffff66 },
    { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 },
    { 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 },
index 398c288..40aeb3f 100644 (file)
    { 0x00000040, 0x24482d29, 0x00000448, 0x00100010 },
    { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 },
    { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 },
+   { 0x00010040, 0x20a12e31, 0x000000a1, 0x00010001 },
    { 0x00010001, 0x24480169, 0x00000000, 0x00000000 },
    { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 },
+   { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
+   { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
+   { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
+   { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
+   { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
+   { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
    { 0x00000040, 0x24882c21, 0x00000488, 0x000a000a },
    { 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff },
-   { 0x00110020, 0x34001c00, 0x02001400, 0xffffff74 },
+   { 0x00110020, 0x34001c00, 0x02001400, 0xffffff66 },
    { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 },
    { 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 },