633e4b92cea6c768a92c16dece7afcd62af74460
[platform/upstream/libva.git] / i965_drv_video / i965_avc_hw_scoreboard.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <string.h>
31 #include <assert.h>
32
33 #include "va_backend.h"
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_avc_hw_scoreboard.h"
41 #include "i965_media_h264.h"
42 #include "i965_media.h"
43
44 extern struct i965_kernel *h264_avc_kernels;
45
46 /* On Ironlake */
47 #include "shaders/h264/mc/export.inc.gen5"
48
49 enum {
50     AVC_HW_SCOREBOARD = 0,
51     AVC_HW_SCOREBOARD_MBAFF
52 };
53
54 static unsigned long avc_hw_scoreboard_kernel_offset[] = {
55     SETHWSCOREBOARD_IP_GEN5 * INST_UNIT_GEN5,
56     SETHWSCOREBOARD_MBAFF_IP_GEN5 * INST_UNIT_GEN5
57 };
58
59 static unsigned int avc_hw_scoreboard_constants[] = {
60     0x08040201,
61     0x00000010,
62     0x08000210,
63     0x00000000,
64     0x08040201,
65     0x08040210,
66     0x01000010,
67     0x08040200
68 };
69
70 static void
71 i965_avc_hw_scoreboard_surface_state(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
72 {
73     struct i965_surface_state *ss;
74     dri_bo *bo;
75
76     bo = avc_hw_scoreboard_context->surface.ss_bo;
77     dri_bo_map(bo, 1);
78     assert(bo->virtual);
79     ss = bo->virtual;
80     memset(ss, 0, sizeof(*ss));
81     ss->ss0.surface_type = I965_SURFACE_BUFFER;
82     ss->ss1.base_addr = avc_hw_scoreboard_context->surface.s_bo->offset;
83     ss->ss2.width = ((avc_hw_scoreboard_context->surface.total_mbs * MB_CMD_IN_OWS - 1) & 0x7f);
84     ss->ss2.height = (((avc_hw_scoreboard_context->surface.total_mbs * MB_CMD_IN_OWS - 1) >> 7) & 0x1fff);
85     ss->ss3.depth = (((avc_hw_scoreboard_context->surface.total_mbs * MB_CMD_IN_OWS - 1) >> 20) & 0x7f);
86     dri_bo_emit_reloc(bo,
87                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
88                       0,
89                       offsetof(struct i965_surface_state, ss1),
90                       avc_hw_scoreboard_context->surface.s_bo);
91     dri_bo_unmap(bo);
92 }
93
94 static void
95 i965_avc_hw_scoreboard_interface_descriptor_table(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
96 {
97     struct i965_interface_descriptor *desc;
98     dri_bo *bo;
99
100     bo = avc_hw_scoreboard_context->idrt.bo;
101     dri_bo_map(bo, 1);
102     assert(bo->virtual);
103     desc = bo->virtual;
104     memset(desc, 0, sizeof(*desc));
105     desc->desc0.grf_reg_blocks = 7;
106     desc->desc0.kernel_start_pointer = (avc_hw_scoreboard_context->hw_kernel.bo->offset + 
107                                         avc_hw_scoreboard_context->hw_kernel.offset) >> 6; /* reloc */
108     desc->desc1.const_urb_entry_read_offset = 0;
109     desc->desc1.const_urb_entry_read_len = 1;
110     desc->desc3.binding_table_entry_count = 0;
111     desc->desc3.binding_table_pointer = 
112         avc_hw_scoreboard_context->binding_table.bo->offset >> 5; /*reloc */
113
114     dri_bo_emit_reloc(bo,
115                       I915_GEM_DOMAIN_INSTRUCTION, 0,
116                       desc->desc0.grf_reg_blocks + avc_hw_scoreboard_context->hw_kernel.offset,
117                       offsetof(struct i965_interface_descriptor, desc0),
118                       avc_hw_scoreboard_context->hw_kernel.bo);
119
120     dri_bo_emit_reloc(bo,
121                       I915_GEM_DOMAIN_INSTRUCTION, 0,
122                       desc->desc3.binding_table_entry_count,
123                       offsetof(struct i965_interface_descriptor, desc3),
124                       avc_hw_scoreboard_context->binding_table.bo);
125
126     dri_bo_unmap(bo);
127 }
128
129 static void
130 i965_avc_hw_scoreboard_binding_table(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
131 {
132     unsigned int *binding_table;
133     dri_bo *bo = avc_hw_scoreboard_context->binding_table.bo;
134
135     dri_bo_map(bo, 1);
136     assert(bo->virtual);
137     binding_table = bo->virtual;
138     memset(binding_table, 0, bo->size);
139     binding_table[0] = avc_hw_scoreboard_context->surface.ss_bo->offset;
140     dri_bo_emit_reloc(bo,
141                       I915_GEM_DOMAIN_INSTRUCTION, 0,
142                       0,
143                       0,
144                       avc_hw_scoreboard_context->surface.ss_bo);
145     dri_bo_unmap(bo);
146 }
147
148 static void
149 i965_avc_hw_scoreboard_vfe_state(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
150 {
151     struct i965_vfe_state *vfe_state;
152     dri_bo *bo;
153
154     bo = avc_hw_scoreboard_context->vfe_state.bo;
155     dri_bo_map(bo, 1);
156     assert(bo->virtual);
157     vfe_state = bo->virtual;
158     memset(vfe_state, 0, sizeof(*vfe_state));
159     vfe_state->vfe1.max_threads = avc_hw_scoreboard_context->urb.num_vfe_entries - 1;
160     vfe_state->vfe1.urb_entry_alloc_size = avc_hw_scoreboard_context->urb.size_vfe_entry - 1;
161     vfe_state->vfe1.num_urb_entries = avc_hw_scoreboard_context->urb.num_vfe_entries;
162     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
163     vfe_state->vfe1.children_present = 0;
164     vfe_state->vfe2.interface_descriptor_base = 
165         avc_hw_scoreboard_context->idrt.bo->offset >> 4; /* reloc */
166     dri_bo_emit_reloc(bo,
167                       I915_GEM_DOMAIN_INSTRUCTION, 0,
168                       0,
169                       offsetof(struct i965_vfe_state, vfe2),
170                       avc_hw_scoreboard_context->idrt.bo);
171     dri_bo_unmap(bo);
172 }
173
174 static void
175 i965_avc_hw_scoreboard_upload_constants(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
176 {
177     unsigned char *constant_buffer;
178
179     if (avc_hw_scoreboard_context->curbe.upload)
180         return;
181
182     dri_bo_map(avc_hw_scoreboard_context->curbe.bo, 1);
183     assert(avc_hw_scoreboard_context->curbe.bo->virtual);
184     constant_buffer = avc_hw_scoreboard_context->curbe.bo->virtual;
185     memcpy(constant_buffer, avc_hw_scoreboard_constants, sizeof(avc_hw_scoreboard_constants));
186     dri_bo_unmap(avc_hw_scoreboard_context->curbe.bo);
187     avc_hw_scoreboard_context->curbe.upload = 1;
188 }
189
190 static void
191 i965_avc_hw_scoreboard_states_setup(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
192 {
193     i965_avc_hw_scoreboard_surface_state(avc_hw_scoreboard_context);
194     i965_avc_hw_scoreboard_binding_table(avc_hw_scoreboard_context);
195     i965_avc_hw_scoreboard_interface_descriptor_table(avc_hw_scoreboard_context);
196     i965_avc_hw_scoreboard_vfe_state(avc_hw_scoreboard_context);
197     i965_avc_hw_scoreboard_upload_constants(avc_hw_scoreboard_context);
198 }
199
200 static void
201 i965_avc_hw_scoreboard_pipeline_select(VADriverContextP ctx)
202 {
203     BEGIN_BATCH(ctx, 1);
204     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
205     ADVANCE_BATCH(ctx);
206 }
207
208 static void
209 i965_avc_hw_scoreboard_urb_layout(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
210 {
211     struct i965_driver_data *i965 = i965_driver_data(ctx);
212     unsigned int vfe_fence, cs_fence;
213
214     vfe_fence = avc_hw_scoreboard_context->urb.cs_start;
215     cs_fence = URB_SIZE((&i965->intel));
216
217     BEGIN_BATCH(ctx, 3);
218     OUT_BATCH(ctx, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
219     OUT_BATCH(ctx, 0);
220     OUT_BATCH(ctx, 
221               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
222               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
223     ADVANCE_BATCH(ctx);
224 }
225
226 static void
227 i965_avc_hw_scoreboard_state_base_address(VADriverContextP ctx)
228 {
229     BEGIN_BATCH(ctx, 8);
230     OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
231     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
232     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
233     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
234     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
235     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
236     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
237     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
238     ADVANCE_BATCH(ctx);
239 }
240
241 static void
242 i965_avc_hw_scoreboard_state_pointers(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
243 {
244     BEGIN_BATCH(ctx, 3);
245     OUT_BATCH(ctx, CMD_MEDIA_STATE_POINTERS | 1);
246     OUT_BATCH(ctx, 0);
247     OUT_RELOC(ctx, avc_hw_scoreboard_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
248     ADVANCE_BATCH(ctx);
249 }
250
251 static void 
252 i965_avc_hw_scoreboard_cs_urb_layout(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
253 {
254     BEGIN_BATCH(ctx, 2);
255     OUT_BATCH(ctx, CMD_CS_URB_STATE | 0);
256     OUT_BATCH(ctx,
257               ((avc_hw_scoreboard_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
258               (avc_hw_scoreboard_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
259     ADVANCE_BATCH(ctx);
260 }
261
262 static void
263 i965_avc_hw_scoreboard_constant_buffer(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
264 {
265     BEGIN_BATCH(ctx, 2);
266     OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
267     OUT_RELOC(ctx, avc_hw_scoreboard_context->curbe.bo,
268               I915_GEM_DOMAIN_INSTRUCTION, 0,
269               avc_hw_scoreboard_context->urb.size_cs_entry - 1);
270     ADVANCE_BATCH(ctx);    
271 }
272
273 static void
274 i965_avc_hw_scoreboard_objects(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
275 {
276     int number_mb_cmds = 512;
277     int starting_mb_number = avc_hw_scoreboard_context->inline_data.starting_mb_number;
278     int i;
279
280     for (i = 0; i < avc_hw_scoreboard_context->inline_data.num_mb_cmds / 512; i++) {
281         BEGIN_BATCH(ctx, 6);
282         OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 4);
283         OUT_BATCH(ctx, 0); /* interface descriptor offset: 0 */
284         OUT_BATCH(ctx, 0); /* no indirect data */
285         OUT_BATCH(ctx, 0);
286         OUT_BATCH(ctx, ((number_mb_cmds << 16) |
287                         (starting_mb_number << 0)));
288         OUT_BATCH(ctx, avc_hw_scoreboard_context->inline_data.pic_width_in_mbs);
289         ADVANCE_BATCH(ctx);
290
291         starting_mb_number += 512;
292     }
293
294     number_mb_cmds = avc_hw_scoreboard_context->inline_data.num_mb_cmds % 512;
295
296     if (number_mb_cmds) {
297         BEGIN_BATCH(ctx, 6);
298         OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 4);
299         OUT_BATCH(ctx, 0); /* interface descriptor offset: 0 */
300         OUT_BATCH(ctx, 0); /* no indirect data */
301         OUT_BATCH(ctx, 0);
302         OUT_BATCH(ctx, ((number_mb_cmds << 16) |
303                         (starting_mb_number << 0)));
304         OUT_BATCH(ctx, avc_hw_scoreboard_context->inline_data.pic_width_in_mbs);
305         ADVANCE_BATCH(ctx);
306     }
307 }
308
309 static void
310 i965_avc_hw_scoreboard_pipeline_setup(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
311 {
312     intel_batchbuffer_start_atomic(ctx, 0x1000);
313     intel_batchbuffer_emit_mi_flush(ctx);
314     i965_avc_hw_scoreboard_pipeline_select(ctx);
315     i965_avc_hw_scoreboard_state_base_address(ctx);
316     i965_avc_hw_scoreboard_state_pointers(ctx, avc_hw_scoreboard_context);
317     i965_avc_hw_scoreboard_urb_layout(ctx, avc_hw_scoreboard_context);
318     i965_avc_hw_scoreboard_cs_urb_layout(ctx, avc_hw_scoreboard_context);
319     i965_avc_hw_scoreboard_constant_buffer(ctx, avc_hw_scoreboard_context);
320     i965_avc_hw_scoreboard_objects(ctx, avc_hw_scoreboard_context);
321     intel_batchbuffer_end_atomic(ctx);
322 }
323
324 void
325 i965_avc_hw_scoreboard(VADriverContextP ctx, struct decode_state *decode_state, void *h264_context)
326 {
327     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
328
329     if (i965_h264_context->use_avc_hw_scoreboard) {
330         struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
331
332         avc_hw_scoreboard_context->inline_data.num_mb_cmds = i965_h264_context->avc_it_command_mb_info.mbs;
333         avc_hw_scoreboard_context->inline_data.starting_mb_number = i965_h264_context->avc_it_command_mb_info.mbs;
334         avc_hw_scoreboard_context->inline_data.pic_width_in_mbs = i965_h264_context->picture.width_in_mbs;
335         avc_hw_scoreboard_context->surface.total_mbs = i965_h264_context->avc_it_command_mb_info.mbs * 2;
336         
337         dri_bo_unreference(avc_hw_scoreboard_context->hw_kernel.bo);
338         avc_hw_scoreboard_context->hw_kernel.bo = h264_avc_kernels[H264_AVC_COMBINED].bo;
339         assert(avc_hw_scoreboard_context->hw_kernel.bo != NULL);
340         dri_bo_reference(avc_hw_scoreboard_context->hw_kernel.bo);
341
342         if (i965_h264_context->picture.mbaff_frame_flag)
343             avc_hw_scoreboard_context->hw_kernel.offset = avc_hw_scoreboard_kernel_offset[AVC_HW_SCOREBOARD_MBAFF];
344         else
345             avc_hw_scoreboard_context->hw_kernel.offset = avc_hw_scoreboard_kernel_offset[AVC_HW_SCOREBOARD];
346
347         i965_avc_hw_scoreboard_states_setup(avc_hw_scoreboard_context);
348         i965_avc_hw_scoreboard_pipeline_setup(ctx, avc_hw_scoreboard_context);
349     }
350 }
351
352 void
353 i965_avc_hw_scoreboard_decode_init(VADriverContextP ctx, void *h264_context)
354 {
355     struct i965_driver_data *i965 = i965_driver_data(ctx);
356     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
357
358     if (i965_h264_context->use_avc_hw_scoreboard) {
359         struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
360         dri_bo *bo;
361
362         if (avc_hw_scoreboard_context->curbe.bo == NULL) {
363             bo = dri_bo_alloc(i965->intel.bufmgr,
364                               "constant buffer",
365                               4096, 64);
366             assert(bo);
367             avc_hw_scoreboard_context->curbe.bo = bo;
368             avc_hw_scoreboard_context->curbe.upload = 0;
369         }
370
371         dri_bo_unreference(avc_hw_scoreboard_context->surface.s_bo);
372         avc_hw_scoreboard_context->surface.s_bo = i965_h264_context->avc_it_command_mb_info.bo;
373         assert(avc_hw_scoreboard_context->surface.s_bo != NULL);
374         dri_bo_reference(avc_hw_scoreboard_context->surface.s_bo);
375
376         dri_bo_unreference(avc_hw_scoreboard_context->surface.ss_bo);
377         bo = dri_bo_alloc(i965->intel.bufmgr, 
378                           "surface state", 
379                           sizeof(struct i965_surface_state), 32);
380         assert(bo);
381         avc_hw_scoreboard_context->surface.ss_bo = bo;
382
383         dri_bo_unreference(avc_hw_scoreboard_context->binding_table.bo);
384         bo = dri_bo_alloc(i965->intel.bufmgr, 
385                           "binding table",
386                           MAX_MEDIA_SURFACES * sizeof(unsigned int), 32);
387         assert(bo);
388         avc_hw_scoreboard_context->binding_table.bo = bo;
389
390         dri_bo_unreference(avc_hw_scoreboard_context->idrt.bo);
391         bo = dri_bo_alloc(i965->intel.bufmgr, 
392                           "interface discriptor", 
393                           MAX_INTERFACE_DESC * sizeof(struct i965_interface_descriptor), 16);
394         assert(bo);
395         avc_hw_scoreboard_context->idrt.bo = bo;
396
397         dri_bo_unreference(avc_hw_scoreboard_context->vfe_state.bo);
398         bo = dri_bo_alloc(i965->intel.bufmgr, 
399                           "vfe state", 
400                           sizeof(struct i965_vfe_state), 32);
401         assert(bo);
402         avc_hw_scoreboard_context->vfe_state.bo = bo;
403
404         avc_hw_scoreboard_context->urb.num_vfe_entries = 32;
405         avc_hw_scoreboard_context->urb.size_vfe_entry = 2;
406         avc_hw_scoreboard_context->urb.num_cs_entries = 1;
407         avc_hw_scoreboard_context->urb.size_cs_entry = 1;
408         avc_hw_scoreboard_context->urb.vfe_start = 0;
409         avc_hw_scoreboard_context->urb.cs_start = avc_hw_scoreboard_context->urb.vfe_start + 
410             avc_hw_scoreboard_context->urb.num_vfe_entries * avc_hw_scoreboard_context->urb.size_vfe_entry;
411         assert(avc_hw_scoreboard_context->urb.cs_start + 
412                avc_hw_scoreboard_context->urb.num_cs_entries * avc_hw_scoreboard_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
413     }
414 }
415
416 Bool 
417 i965_avc_hw_scoreboard_ternimate(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
418 {
419     dri_bo_unreference(avc_hw_scoreboard_context->curbe.bo);
420     avc_hw_scoreboard_context->curbe.bo = NULL;
421
422     dri_bo_unreference(avc_hw_scoreboard_context->surface.ss_bo);
423     avc_hw_scoreboard_context->surface.ss_bo = NULL;
424
425     dri_bo_unreference(avc_hw_scoreboard_context->surface.s_bo);
426     avc_hw_scoreboard_context->surface.s_bo = NULL;
427
428     dri_bo_unreference(avc_hw_scoreboard_context->binding_table.bo);
429     avc_hw_scoreboard_context->binding_table.bo = NULL;
430
431     dri_bo_unreference(avc_hw_scoreboard_context->idrt.bo);
432     avc_hw_scoreboard_context->idrt.bo = NULL;
433
434     dri_bo_unreference(avc_hw_scoreboard_context->vfe_state.bo);
435     avc_hw_scoreboard_context->vfe_state.bo = NULL;
436
437     dri_bo_unreference(avc_hw_scoreboard_context->hw_kernel.bo);
438     avc_hw_scoreboard_context->hw_kernel.bo = NULL;
439
440     return True;
441 }