Add MPEG2 VME shader files for IVB
[profile/ivi/vaapi-intel-driver.git] / src / shaders / vme / inter_frame.asm
1 /*
2  * Copyright © <2010>, Intel Corporation.
3  *
4  * This program is licensed under the terms and conditions of the
5  * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
6  * http://www.opensource.org/licenses/eclipse-1.0.php.
7  *
8  */
9 // Modual name: IntraFrame.asm
10 //
11 // Make intra predition estimation for Intra frame
12 //
13
14 //
15 //  Now, begin source code....
16 //
17
18 /*
19  * __START
20  */
21 __INTER_START:
22 mov  (16) tmp_reg0.0<1>:UD      0x0:UD {align1};
23 mov  (16) tmp_reg2.0<1>:UD      0x0:UD {align1};
24 mov  (16) tmp_reg3.0<1>:UD      0x0:UD {align1};
25
26 shl  (2) read0_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
27 add  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D -8:W {align1};     /* X offset */
28 add  (1) read0_header.4<1>:D    read0_header.4<0,1,0>:D -1:W {align1};     /* Y offset */ 
29 mov  (1) read0_header.8<1>:UD   BLOCK_32X1 {align1};
30 mov  (1) read0_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
31
32 shl  (2) read1_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
33 add  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D -4:W {align1};     /* X offset */
34 mov  (1) read1_header.8<1>:UD   BLOCK_4X16 {align1};
35 mov  (1) read1_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
36         
37 shl  (2) vme_m0.8<1>:UW         orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* Source =  (x, y) * 16 */
38         
39 #ifdef DEV_SNB
40 shl  (2) vme_m0.0<1>:UW         orig_xy_ub<2,2,1>:UB 4:UW {align1};     
41 add  (1) vme_m0.0<1>:W          vme_m0.0<0,1,0>:W -16:W {align1};               /* Reference = (x-16,y-12)-(x+32,y+24) */
42 add  (1) vme_m0.2<1>:W          vme_m0.2<0,1,0>:W -12:W {align1};
43 #else
44 mov  (1) vme_m0.0<1>:W          -16:W {align1} ;                /* Reference = (x-16,y-12)-(x+32,y+24) */
45 mov  (1) vme_m0.2<1>:W          -12:W {align1} ;
46 #endif
47         
48 mov  (1) vme_m0.12<1>:UD        SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1};    /* 16x16 Source, 1/4 pixel, harr */
49 mov  (1) vme_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
50 mov  (1) vme_m0.22<1>:UW        REF_REGION_SIZE {align1};               /* Reference Width&Height, 32x32 */
51
52 mov  (1) vme_m1.0<1>:UD         ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
53 mov  (1) vme_m1.4<1>:UD         FB_PRUNING_ENABLE:UD {align1};
54 /* MV num is passed by constant buffer. R4.28 */
55 mov  (1) vme_m1.4<1>:UB         r4.28<0,1,0>:UB {align1};
56 mov  (1) vme_m1.8<1>:UD         START_CENTER + SEARCH_PATH_LEN:UD {align1};
57
58 mul  (1) obw_m0.8<1>:UD         w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
59 add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
60 mul  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1};
61 mov  (1) obw_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
62
63 __VME_LOOP:     
64
65 /*
66  * Media Read Message -- fetch neighbor edge pixels 
67  */
68 /* ROW */
69 mov  (8) msg_reg0.0<1>:UD       read0_header.0<8,8,1>:UD {align1};        
70 send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
71
72 /* COL */
73 mov  (8) msg_reg0.0<1>:UD       read1_header.0<8,8,1>:UD {align1};                
74 send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
75         
76 /*
77  * VME message
78  */
79 /* m0 */
80 mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD {align1};
81         
82 /* m1 */
83 mov  (1) intra_flag<1>:UW       0x0:UW {align1}                     ;
84 and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
85 (f0.0) mov  (1) intra_part_mask_ub<1>:UB  LUMA_INTRA_8x8_DISABLE {align1};
86
87 cmp.nz.f0.0 (1) null<1>:UW orig_x_ub<0,1,0>:UB 0:UW {align1};                                                   /* X != 0 */
88 (f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_AE {align1};         /* A */
89
90 cmp.nz.f0.0 (1) null<1>:UW orig_y_ub<0,1,0>:UB 0:UW {align1};                                                   /* Y != 0 */
91 (f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_B {align1};          /* B */
92
93 mul.nz.f0.0 (1) null<1>:UW orig_x_ub<0,1,0>:UB orig_y_ub<0,1,0>:UB {align1};                                    /* X * Y != 0 */
94 (f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_D {align1};          /* D */
95
96 add  (1) tmp_x_w<1>:W orig_x_ub<0,1,0>:UB 1:UW {align1};                                                        /* X + 1 */
97 add  (1) tmp_x_w<1>:W w_in_mb_uw<0,1,0>:UW -tmp_x_w<0,1,0>:W {align1};                                          /* width - (X + 1) */
98 mul.nz.f0.0 (1) null<1>:UD tmp_x_w<0,1,0>:W orig_y_ub<0,1,0>:UB {align1};                                       /* (width - (X + 1)) * Y != 0 */
99 (f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_C {align1};          /* C */
100
101 and.nz.f0.0 (1) null<1>:UW slice_edge_ub<0,1,0>:UB 2:UW {align1};
102 (f0.0) and (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB  0xE0 {align1};                            /* slice edge disable B,C,D*/
103         
104 mov  (8) vme_msg_1<1>:UD        vme_m1.0<8,8,1>:UD {align1};
105         
106 /* m2 */        
107 mov  (8) vme_msg_2<1>:UD        0x0:UD {align1};
108
109 /* m3 */
110 mov  (1) INEP_ROW.0<1>:UD       0x0:UD {align1};
111 and  (1) INEP_ROW.4<1>:UD       INEP_ROW.4<0,1,0>:UD            0xFF000000:UD {align1};
112 mov  (8) vme_msg_3<1>:UD        INEP_ROW.0<8,8,1>:UD {align1};        
113
114 /* m4 */
115 mov  (8) vme_msg_4<1>:UD        0x0 {align1};
116 mov (16) vme_msg_4.0<1>:UB      INEP_COL0.3<32,8,4>:UB {align1};
117 mov  (1) vme_msg_4.16<1>:UD     INTRA_PREDICTORE_MODE {align1};
118
119 send (8)
120         vme_msg_ind
121         vme_wb
122         null
123         vme(
124                 BIND_IDX_VME,
125                 0,
126                 0,
127                 VME_MESSAGE_TYPE_MIXED
128         )
129         mlen vme_msg_length
130         rlen vme_inter_wb_length
131         {align1};
132 /*
133  * Oword Block Write message
134  */
135
136 /* MV pairs */        
137 mov  (8) msg_reg0.0<1>:UD       obw_m0.0<8,8,1>:UD {align1};
138
139 #ifdef DEV_SNB        
140 mov  (16) obw_m1.0<1>:UW        vme_wb1.0<16,16,1>:UB  {align1};
141 add  (8) obw_m1.0<2>:W          obw_m1.0<16,8,2>:W -64:W {align1};
142 add  (8) obw_m1.2<2>:W          obw_m1.2<16,8,2>:W -48:W {align1};
143 mov  (16) obw_m2.0<1>:UW        vme_wb1.16<16,16,1>:UB  {align1};
144 add  (8) obw_m2.0<2>:W          obw_m2.0<16,8,2>:W -64:W {align1};
145 add  (8) obw_m2.2<2>:W          obw_m2.2<16,8,2>:W -48:W {align1}; 
146 mov  (16) obw_m3.0<1>:UW        vme_wb2.0<16,16,1>:UB  {align1};
147 add  (8) obw_m3.0<2>:W          obw_m3.0<16,8,2>:W -64:W {align1};
148 add  (8) obw_m3.2<2>:W          obw_m3.2<16,8,2>:W -48:W {align1};
149 mov  (16) obw_m4.0<1>:UW        vme_wb2.16<16,16,1>:UB  {align1};
150 add  (8) obw_m4.0<2>:W          obw_m4.0<16,8,2>:W -64:W {align1};
151 add  (8) obw_m4.2<2>:W          obw_m4.2<16,8,2>:W -48:W {align1}; 
152 #else
153 mov  (8) obw_m1.0<1>:ud         vme_wb1.0<8,8,1>:ud {align1};
154 mov  (8) obw_m2.0<1>:ud         vme_wb2.0<8,8,1>:ud {align1};
155 mov  (8) obw_m3.0<1>:ud         vme_wb3.0<8,8,1>:ud {align1};
156 mov  (8) obw_m4.0<1>:ud         vme_wb4.0<8,8,1>:ud {align1};                
157 #endif       
158         
159 mov  (8) msg_reg1.0<1>:UD       obw_m1.0<8,8,1>:UD   {align1};
160
161 mov  (8) msg_reg2.0<1>:UD       obw_m2.0<8,8,1>:UD   {align1};
162
163 mov  (8) msg_reg3.0<1>:UD       obw_m3.0<8,8,1>:UD   {align1};
164
165 mov  (8) msg_reg4.0<1>:UD       obw_m4.0<8,8,1>:UD   {align1};                
166
167 /* bind index 3, write 8 oword, msg type: 8(OWord Block Write) */
168 send (16)
169         msg_ind
170         obw_wb
171         null
172         data_port(
173                 OBW_CACHE_TYPE,
174                 OBW_MESSAGE_TYPE,
175                 OBW_CONTROL_4,
176                 OBW_BIND_IDX,
177                 OBW_WRITE_COMMIT_CATEGORY,
178                 OBW_HEADER_PRESENT
179         )
180         mlen 5
181         rlen obw_wb_length
182         {align1};
183
184 /* other info */        
185 add             (1)     msg_reg0.8<1>:UD        obw_m0.8<0,1,0>:UD      INTER_VME_OUTPUT_MV_IN_OWS:UD {align1} ;
186
187 and.z.f0.0      (1)     null<1>:ud              vme_wb0.0<0,1,0>:ud     INTRAMBFLAG_MASK:ud {align1} ;
188
189 (-f0.0)jmpi     (1)     __INTRA_INFO ;
190
191 __INTER_INFO:   
192 mov             (1)     tmp_uw1<1>:uw           0:uw {align1} ;
193 mov             (1)     tmp_ud1<1>:ud           0:ud {align1} ;
194 (f0.0)and       (1)     tmp_uw1<1>:uw           vme_wb0.2<0,1,0>:uw     MV32_BIT_MASK:uw {align1} ;
195 (f0.0)shr       (1)     tmp_uw1<1>:uw           tmp_uw1<1>:uw           MV32_BIT_SHIFT:uw {align1} ;
196 (f0.0)mul       (1)     tmp_ud1<1>:ud           tmp_uw1<0,1,0>:uw       96:uw {align1} ;
197 (f0.0)add       (1)     tmp_ud1<1>:ud           tmp_ud1<0,1,0>:ud       32:uw {align1} ;
198 (f0.0)shl       (1)     tmp_uw1<1>:uw           tmp_uw1<0,1,0>:uw       MFC_MV32_BIT_SHIFT:uw {align1} ;
199 (f0.0)add       (1)     tmp_uw1<1>:uw           tmp_uw1<0,1,0>:uw       MVSIZE_UW_BASE:uw {align1} ;
200 add             (1)     tmp_uw1<1>:uw           tmp_uw1<0,1,0>:uw       CBP_DC_YUV_UW:uw {align1} ;
201
202 mov             (1)     msg_reg1.0<1>:uw        vme_wb0.0<0,1,0>:uw     {align1} ;
203 mov             (1)     msg_reg1.2<1>:uw        tmp_uw1<0,1,0>:uw       {align1} ;
204 mov             (1)     msg_reg1.4<1>:UD        vme_wb0.28<0,1,0>:UD    {align1};
205 mov             (1)     msg_reg1.8<1>:ud        tmp_ud1<0,1,0>:ud       {align1} ;
206
207 jmpi            (1)     __OUTPUT_INFO ;
208         
209 __INTRA_INFO:
210 mov             (1)     msg_reg1.0<1>:UD        vme_wb.0<0,1,0>:UD      {align1};
211 mov             (1)     msg_reg1.4<1>:UD        vme_wb.16<0,1,0>:UD     {align1};
212 mov             (1)     msg_reg1.8<1>:UD        vme_wb.20<0,1,0>:UD     {align1};
213 mov             (1)     msg_reg1.12<1>:UD       vme_wb.24<0,1,0>:UD     {align1};
214
215 __OUTPUT_INFO:  
216 /* bind index 3, write 1 oword, msg type: 8(OWord Block Write) */
217 send (16)
218         msg_ind
219         obw_wb
220         null
221         data_port(
222                 OBW_CACHE_TYPE,
223                 OBW_MESSAGE_TYPE,
224                 OBW_CONTROL_0,
225                 OBW_BIND_IDX,
226                 OBW_WRITE_COMMIT_CATEGORY,
227                 OBW_HEADER_PRESENT
228         )
229         mlen 2
230         rlen obw_wb_length
231         {align1};
232
233 add             (1)     orig_x_ub<1>:ub         orig_x_ub<0,1,0>:ub             1:uw {align1} ;
234 add             (1)     vme_m0.8<1>:UW          vme_m0.8<0,1,0>:UW              16:UW {align1};    /* X += 16 */
235 #ifdef DEV_SNB        
236 add             (1)     vme_m0.0<1>:W           vme_m0.0<0,1,0>:W               16:W {align1};     /* X += 16 */
237 #endif
238
239 cmp.e.f0.0      (1)     null<1>:uw              w_in_mb_uw<0,1,0>:uw            orig_x_ub<0,1,0>:ub {align1};
240 /* (0, y + 1) */        
241 (f0.0)mov       (1)     orig_x_ub<1>:ub         0:uw {align1} ;
242 (f0.0)mov       (1)     vme_m0.8<1>:uw          0:uw {align1} ;
243 (f0.0)add       (1)     vme_m0.10<1>:uw         vme_m0.10<0,1,0>:uw             16:uw {align1} ;
244 #ifdef DEV_SNB        
245 (f0.0)mov       (1)     vme_m0.0<1>:w           -16:W {align1};                 /* Reference = (x-16,y-12)-(x+32,y+24) */
246 (f0.0)add       (1)     vme_m0.2<1>:w           vme_m0.2<0,1,0>:w               16:w {align1};
247 #endif
248
249 add             (1)     obw_m0.8<1>:UD          obw_m0.8<0,1,0>:UD              INTER_VME_OUTPUT_IN_OWS:UW {align1} ;
250         
251 add.z.f0.1      (1)     num_macroblocks<1>:w    num_macroblocks<0,1,0>:w        -1:w {align1} ;
252 (-f0.1)jmpi     (1)     __VME_LOOP ;
253         
254 __EXIT: 
255         
256 /*
257  * kill thread
258  */        
259 mov  (8) msg_reg0<1>:UD         r0<8,8,1>:UD {align1};
260 send (16) msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};