Add the VME shader for Ivy that supports MVP
[profile/ivi/vaapi-intel-driver.git] / src / shaders / vme / vme7.inc
1 /*
2  * Copyright © <2010>, Intel Corporation.
3  *
4  * This program is licensed under the terms and conditions of the
5  * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
6  * http://www.opensource.org/licenses/eclipse-1.0.php.
7  *
8  */
9 // Modual name: ME_header.inc
10 //
11 // Global symbols define
12 //
13
14 /*
15  * Constant
16  */
17 define(`VME_MESSAGE_TYPE_INTER',        `1')
18 define(`VME_MESSAGE_TYPE_INTRA',        `2')
19 define(`VME_MESSAGE_TYPE_MIXED',        `3')
20         
21 define(`BLOCK_32X1',                    `0x0000001F')
22 define(`BLOCK_4X16',                    `0x000F0003')
23         
24 define(`LUMA_INTRA_16x16_DISABLE',      `0x1')
25 define(`LUMA_INTRA_8x8_DISABLE',        `0x2')
26 define(`LUMA_INTRA_4x4_DISABLE',        `0x4')
27
28 define(`INTRA_PRED_AVAIL_FLAG_AE',      `0x60')
29 define(`INTRA_PRED_AVAIL_FLAG_B',       `0x10')
30 define(`INTRA_PRED_AVAIL_FLAG_C',       `0x8')
31 define(`INTRA_PRED_AVAIL_FLAG_D',       `0x4')
32
33 define(`BIND_IDX_VME',                  `0')
34 define(`BIND_IDX_VME_REF0',             `1')
35 define(`BIND_IDX_VME_REF1',             `2')
36 define(`BIND_IDX_OUTPUT',               `3')
37 define(`BIND_IDX_INEP',                 `4')
38
39 define(`SUB_PEL_MODE_INTEGER',          `0x00000000')
40 define(`SUB_PEL_MODE_HALF',             `0x00001000')
41 define(`SUB_PEL_MODE_QUARTER',          `0x00003000')
42
43 define(`INTER_SAD_NONE',                `0x00000000')
44 define(`INTER_SAD_HAAR',                `0x00200000')
45
46 define(`INTRA_SAD_NONE',                `0x00000000')
47 define(`INTRA_SAD_HAAR',                `0x00800000')
48
49 define(`INTER_PART_MASK',               `0x00000000')
50
51 define(`SEARCH_CTRL_SINGLE',            `0x00000000')
52 define(`SEARCH_CTRL_DUAL_START',        `0x00000100')
53 define(`SEARCH_CTRL_DUAL_RECORD',       `0x00000300')
54 define(`SEARCH_CTRL_DUAL_REFERENCE',    `0x00000700')
55
56 define(`REF_REGION_SIZE',               `0x2830:UW')
57
58 define(`BI_SUB_MB_PART_MASK',           `0x0c000000')
59 define(`MAX_NUM_MV',                    `0x00000020')
60 define(`FB_PRUNING_ENABLE',             `0x40000000')
61 define(`FB_PRUNING_DISABLE',             `0x00000000')
62
63 define(`SEARCH_PATH_LEN',               `0x00003030')
64 define(`START_CENTER',                  `0x30000000')
65
66 define(`ADAPTIVE_SEARCH_ENABLE',        `0x00000002') 
67 define(`INTRA_PREDICTORE_MODE',         `0x11111111:UD')
68
69 define(`INTER_VME_OUTPUT_IN_OWS',       `10')
70 define(`INTER_VME_OUTPUT_MV_IN_OWS',    `8')
71
72 define(`INTRAMBFLAG_MASK',              `0x00002000')
73 define(`MVSIZE_UW_BASE',                `0x0040')
74 define(`MFC_MV32_BIT_SHIFT',            `5')
75 define(`CBP_DC_YUV_UW',                 `0x000E')
76
77 define(`DC_HARR_ENABLE',                `0x0000')
78 define(`DC_HARR_DISABLE',               `0x0020')
79
80 define(`MV32_BIT_MASK',                 `0x0020')
81 define(`MV32_BIT_SHIFT',                `5')
82
83 define(`OBW_CACHE_TYPE',                `10')
84
85
86 define(`OBW_MESSAGE_TYPE',              `8')
87
88 define(`OBW_BIND_IDX',                  `BIND_IDX_OUTPUT')
89
90 define(`OBW_CONTROL_0',                 `0')    /* 1 OWord, low 128 bits */
91 define(`OBW_CONTROL_1',                 `1')    /* 1 OWord, high 128 bits */
92 define(`OBW_CONTROL_2',                 `2')    /* 2 OWords */
93 define(`OBW_CONTROL_3',                 `3')    /* 4 OWords */
94 define(`OBW_CONTROL_8',                 `4')    /* 8 OWords */
95
96 define(`FME_REPART_ENABLE',             `0x80000000')
97 define(`FME_REPART_DISABLE',            `0x00000000')
98 define(`FME_SINGLE_PARTION',            `0x00000000')
99 define(`FME_MUL_PARTION',               `0x00000008')
100
101
102 define(`OBW_WRITE_COMMIT_CATEGORY',     `0')    /* category on Ivybridge */
103
104
105 define(`OBW_HEADER_PRESENT',            `1')
106
107 /* GRF registers
108  * r0 header
109  * r1~r4 constant buffer (reserved)
110  * r5 inline data
111  * r6~r11 reserved        
112  * r12 write back of VME message
113  * r13 write back of Oword Block Write        
114  */
115 /*
116  * GRF 0 -- header       
117  */        
118 define(`thread_id_ub',          `r0.20<0,1,0>:UB')  /* thread id in payload */
119
120 /*
121  * GRF 1~4 -- Constant Buffer (reserved)
122  */
123         
124 /*
125  * GRF 5 -- inline data
126  */        
127 define(`inline_reg0',           `r5')
128 define(`w_in_mb_uw',            `inline_reg0.2')
129 define(`orig_xy_ub',            `inline_reg0.0')
130 define(`orig_x_ub',             `inline_reg0.0')    /* in macroblock */    
131 define(`orig_y_ub',             `inline_reg0.1')
132 define(`transform_8x8_ub',      `inline_reg0.4')
133 define(`input_mb_intra_ub',     `inline_reg0.5')
134 define(`num_macroblocks',       `inline_reg0.6')
135
136 /*
137  * GRF 6~11 -- reserved
138  */
139
140 /*
141  * GRF 12~15 -- write back for VME message 
142  */
143 define(`vme_wb',                `r12')
144 define(`vme_wb0',               `r12')
145 define(`vme_wb1',               `r13')
146 define(`vme_wb2',               `r14')
147 define(`vme_wb3',               `r15')
148 define(`vme_wb4',               `r16')
149 define(`vme_wb5',               `r17')
150 define(`vme_wb6',               `r18')
151
152
153 /*
154  * GRF 24 -- write for VME output message
155  */
156 define(`obw_wb',                `null<1>:W')
157 define(`obw_wb_length',         `0')
158
159
160 /*
161  * GRF 28~30 -- Intra Neighbor Edge Pixels
162  */
163 define(`INEP_ROW',              `r28')
164 define(`INEP_COL0',             `r29')
165 define(`INEP_COL1',             `r30')
166         
167 /*
168  * temporary registers
169  */
170 define(`tmp_reg0',              `r32')
171 define(`read0_header',          `tmp_reg0')
172 define(`tmp_reg1',              `r33')
173 define(`read1_header',          `tmp_reg1')
174 define(`tmp_reg2',              `r34')
175 define(`vme_m0',                `tmp_reg2')
176 define(`tmp_reg3',              `r35')                                
177 define(`vme_m1',                `tmp_reg3')
178 define(`intra_flag',            `vme_m1.28')
179 define(`intra_part_mask_ub',    `vme_m1.28')        
180 define(`mb_intra_struct_ub',    `vme_m1.29')
181 define(`tmp_reg4',              `r36')
182 define(`obw_m0',                `tmp_reg4')
183 define(`tmp_reg5',              `r37')
184 define(`obw_m1',                `tmp_reg5')
185 define(`tmp_reg6',              `r38')
186 define(`obw_m2',                `tmp_reg6')
187 define(`tmp_reg7',              `r39')
188 define(`obw_m3',                `tmp_reg7')
189 define(`tmp_reg8',              `r40')
190 define(`obw_m4',                `tmp_reg8')
191 define(`tmp_reg9',              `r41')
192 define(`tmp_x_w',               `tmp_reg9.0')
193 define(`tmp_rega',              `r42')
194 define(`tmp_ud0',               `tmp_rega.0')
195 define(`tmp_ud1',               `tmp_rega.4')
196 define(`tmp_ud2',               `tmp_rega.8')
197 define(`tmp_ud3',               `tmp_rega.12')
198 define(`tmp_uw0',               `tmp_rega.0')
199 define(`tmp_uw1',               `tmp_rega.2')
200 define(`tmp_uw2',               `tmp_rega.4')
201 define(`tmp_uw3',               `tmp_rega.6')
202 define(`tmp_uw4',               `tmp_rega.8')
203 define(`tmp_uw5',               `tmp_rega.10')
204 define(`tmp_uw6',               `tmp_rega.12')
205 define(`tmp_uw7',               `tmp_rega.14')
206
207 define(`vme_m2',                `r43')
208 /*
209  * MRF registers
210  */        
211
212 define(`msg_ind',               `64')
213 define(`msg_reg0',              `r64')
214 define(`msg_reg1',              `r65')
215 define(`msg_reg2',              `r66')
216 define(`msg_reg3',              `r67')
217 define(`msg_reg4',              `r68')
218 define(`msg_reg5',              `r69')
219 define(`msg_reg6',              `r70')
220 define(`msg_reg7',              `r71')
221 define(`msg_reg8',              `r72')
222 define(`msg_reg9',              `r73')
223
224 define(`ts_msg_ind',               `112')
225 define(`ts_msg_reg0',               `r112')
226 /*
227  * VME message payload
228  */
229
230 define(`vme_msg_length',        `5')
231 define(`vme_inter_wb_length',   `6')
232 define(`vme_intra_wb_length',   `1')
233
234 define(`vme_msg_ind',           `msg_ind')
235 define(`vme_msg_0',             `msg_reg0')
236 define(`vme_msg_1',             `msg_reg1')
237 define(`vme_msg_2',             `msg_reg2')
238
239 define(`vme_msg_3',             `msg_reg3')
240 define(`vme_msg_4',             `msg_reg4')
241
242
243 define(`vme_msg_5',             `msg_reg5')
244 define(`vme_msg_6',             `msg_reg6')
245 define(`vme_msg_7',             `msg_reg7')
246 define(`vme_msg_8',             `msg_reg8')
247 define(`vme_msg_9',             `msg_reg9')
248
249 define(`RETURN_REG',              `r127.0')
250 define(`RET_ARG',              `r127.4')
251
252 /* Now at most two registers are used for input parameter */
253 define(`INPUT_ARG0',              `r125')
254 define(`INPUT_ARG1',              `r126')
255
256 /* Two temporal registers are used in the function */
257 define(`TEMP_VAR0',              `r123')
258 define(`TEMP_VAR1',              `r124')
259
260
261 define(`OBR_MESSAGE_TYPE',              `0')
262 define(`OBR_CACHE_TYPE',                `10')
263 define(`OBR_BIND_IDX',                  `BIND_IDX_OUTPUT')
264
265 define(`OBR_CONTROL_0',                 `0')    /* 1 OWord, low 128 bits */
266 define(`OBR_CONTROL_1',                 `1')    /* 1 OWord, high 128 bits */
267 define(`OBR_CONTROL_2',                 `2')    /* 2 OWords */
268 define(`OBR_CONTROL_4',                 `3')    /* 4 OWords */
269 define(`OBR_CONTROL_8',                 `4')    /* 8 OWords */
270 define(`OBR_WRITE_COMMIT_CATEGORY',     `0')    /* category on SNB+ for Data port */
271 define(`OBR_HEADER_PRESENT',            `1')
272
273 define(`mb_hwdep',           `r5.6')
274 define(`MB_AVAIL',              `1:d')
275 define(`MB_PRED_FLAG',          `1:w')
276
277 define(`mb_pred_mode',          `r85')
278 define(`mb_mvp_ref',            `r86')
279 define(`mba_result',            `r87')
280 define(`mbb_result',            `r88')
281 define(`mbc_result',            `r89')
282 define(`mb_ind',                `90')
283 define(`mb_msg0',               `r90')
284 define(`mb_msg_tmp',            `r91')
285 define(`mb_wb',                 `r92')
286 define(`mb_mode_wb',            `r92')
287 define(`mb_mv0',                `r93')
288 define(`mb_mv1',                `r94')
289 define(`mb_mv2',                `r95')
290 define(`mb_mv3',                `r96')
291 define(`mb_ref',                `r97')
292 define(`mb_ref_win',            `r84')
293
294 define(`DREF_REGION_SIZE',               `0x2020:UW')
295 define(`PRED_L0',               `0x0':uw)
296 define(`PRED_L1',               `0x1':uw)
297 define(`PRED_BI',               `0x2':uw)
298 define(`PRED_DIRECT',           `0x3':uw)
299 define(`PRED_MASK',             `0x3':uw)
300
301 /* The MAX search len per reference is 16 */
302 define(`DSEARCH_PATH_LEN',               `0x00001212')
303 define(`BI_WEIGHT',             `0x20':uw)
304 define(`DSTART_CENTER',                  `0x00000000')
305 define(`INTER_MASK',                    `0x03')
306 define(`INTER_16X16MODE',               `0x0')
307 define(`INTER_16X8MODE',                `0x01')
308 define(`INTER_8X16MODE',                `0x02')
309 define(`INTER_8X8MODE',                 `0x03')
310 define(`INTER_BLOCK0',                  `0x0')
311 define(`INTER_BLOCK1',                  `0x1')
312 define(`INTER_BLOCK2',                  `0x2')
313 define(`INTER_BLOCK3',                  `0x3')
314 define(`INTER_16X8MODE',                `0x01')
315 define(`INTER_8X16MODE',                `0x02')