i965_drv_video: store kernel info in the corresponding context
[platform/upstream/libva.git] / i965_drv_video / i965_avc_hw_scoreboard.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <string.h>
31 #include <assert.h>
32
33 #include "va_backend.h"
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_avc_hw_scoreboard.h"
41 #include "i965_media_h264.h"
42 #include "i965_media.h"
43
44 /* On Ironlake */
45 #include "shaders/h264/mc/export.inc.gen5"
46
47 enum {
48     AVC_HW_SCOREBOARD = 0,
49     AVC_HW_SCOREBOARD_MBAFF
50 };
51
52 static unsigned long avc_hw_scoreboard_kernel_offset[] = {
53     SETHWSCOREBOARD_IP_GEN5 * INST_UNIT_GEN5,
54     SETHWSCOREBOARD_MBAFF_IP_GEN5 * INST_UNIT_GEN5
55 };
56
57 static unsigned int avc_hw_scoreboard_constants[] = {
58     0x08040201,
59     0x00000010,
60     0x08000210,
61     0x00000000,
62     0x08040201,
63     0x08040210,
64     0x01000010,
65     0x08040200
66 };
67
68 static void
69 i965_avc_hw_scoreboard_surface_state(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
70 {
71     struct i965_surface_state *ss;
72     dri_bo *bo;
73
74     bo = avc_hw_scoreboard_context->surface.ss_bo;
75     dri_bo_map(bo, 1);
76     assert(bo->virtual);
77     ss = bo->virtual;
78     memset(ss, 0, sizeof(*ss));
79     ss->ss0.surface_type = I965_SURFACE_BUFFER;
80     ss->ss1.base_addr = avc_hw_scoreboard_context->surface.s_bo->offset;
81     ss->ss2.width = ((avc_hw_scoreboard_context->surface.total_mbs * MB_CMD_IN_OWS - 1) & 0x7f);
82     ss->ss2.height = (((avc_hw_scoreboard_context->surface.total_mbs * MB_CMD_IN_OWS - 1) >> 7) & 0x1fff);
83     ss->ss3.depth = (((avc_hw_scoreboard_context->surface.total_mbs * MB_CMD_IN_OWS - 1) >> 20) & 0x7f);
84     dri_bo_emit_reloc(bo,
85                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
86                       0,
87                       offsetof(struct i965_surface_state, ss1),
88                       avc_hw_scoreboard_context->surface.s_bo);
89     dri_bo_unmap(bo);
90 }
91
92 static void
93 i965_avc_hw_scoreboard_interface_descriptor_table(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
94 {
95     struct i965_interface_descriptor *desc;
96     dri_bo *bo;
97
98     bo = avc_hw_scoreboard_context->idrt.bo;
99     dri_bo_map(bo, 1);
100     assert(bo->virtual);
101     desc = bo->virtual;
102     memset(desc, 0, sizeof(*desc));
103     desc->desc0.grf_reg_blocks = 7;
104     desc->desc0.kernel_start_pointer = (avc_hw_scoreboard_context->hw_kernel.bo->offset + 
105                                         avc_hw_scoreboard_context->hw_kernel.offset) >> 6; /* reloc */
106     desc->desc1.const_urb_entry_read_offset = 0;
107     desc->desc1.const_urb_entry_read_len = 1;
108     desc->desc3.binding_table_entry_count = 0;
109     desc->desc3.binding_table_pointer = 
110         avc_hw_scoreboard_context->binding_table.bo->offset >> 5; /*reloc */
111
112     dri_bo_emit_reloc(bo,
113                       I915_GEM_DOMAIN_INSTRUCTION, 0,
114                       desc->desc0.grf_reg_blocks + avc_hw_scoreboard_context->hw_kernel.offset,
115                       offsetof(struct i965_interface_descriptor, desc0),
116                       avc_hw_scoreboard_context->hw_kernel.bo);
117
118     dri_bo_emit_reloc(bo,
119                       I915_GEM_DOMAIN_INSTRUCTION, 0,
120                       desc->desc3.binding_table_entry_count,
121                       offsetof(struct i965_interface_descriptor, desc3),
122                       avc_hw_scoreboard_context->binding_table.bo);
123
124     dri_bo_unmap(bo);
125 }
126
127 static void
128 i965_avc_hw_scoreboard_binding_table(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
129 {
130     unsigned int *binding_table;
131     dri_bo *bo = avc_hw_scoreboard_context->binding_table.bo;
132
133     dri_bo_map(bo, 1);
134     assert(bo->virtual);
135     binding_table = bo->virtual;
136     memset(binding_table, 0, bo->size);
137     binding_table[0] = avc_hw_scoreboard_context->surface.ss_bo->offset;
138     dri_bo_emit_reloc(bo,
139                       I915_GEM_DOMAIN_INSTRUCTION, 0,
140                       0,
141                       0,
142                       avc_hw_scoreboard_context->surface.ss_bo);
143     dri_bo_unmap(bo);
144 }
145
146 static void
147 i965_avc_hw_scoreboard_vfe_state(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
148 {
149     struct i965_vfe_state *vfe_state;
150     dri_bo *bo;
151
152     bo = avc_hw_scoreboard_context->vfe_state.bo;
153     dri_bo_map(bo, 1);
154     assert(bo->virtual);
155     vfe_state = bo->virtual;
156     memset(vfe_state, 0, sizeof(*vfe_state));
157     vfe_state->vfe1.max_threads = avc_hw_scoreboard_context->urb.num_vfe_entries - 1;
158     vfe_state->vfe1.urb_entry_alloc_size = avc_hw_scoreboard_context->urb.size_vfe_entry - 1;
159     vfe_state->vfe1.num_urb_entries = avc_hw_scoreboard_context->urb.num_vfe_entries;
160     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
161     vfe_state->vfe1.children_present = 0;
162     vfe_state->vfe2.interface_descriptor_base = 
163         avc_hw_scoreboard_context->idrt.bo->offset >> 4; /* reloc */
164     dri_bo_emit_reloc(bo,
165                       I915_GEM_DOMAIN_INSTRUCTION, 0,
166                       0,
167                       offsetof(struct i965_vfe_state, vfe2),
168                       avc_hw_scoreboard_context->idrt.bo);
169     dri_bo_unmap(bo);
170 }
171
172 static void
173 i965_avc_hw_scoreboard_upload_constants(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
174 {
175     unsigned char *constant_buffer;
176
177     if (avc_hw_scoreboard_context->curbe.upload)
178         return;
179
180     dri_bo_map(avc_hw_scoreboard_context->curbe.bo, 1);
181     assert(avc_hw_scoreboard_context->curbe.bo->virtual);
182     constant_buffer = avc_hw_scoreboard_context->curbe.bo->virtual;
183     memcpy(constant_buffer, avc_hw_scoreboard_constants, sizeof(avc_hw_scoreboard_constants));
184     dri_bo_unmap(avc_hw_scoreboard_context->curbe.bo);
185     avc_hw_scoreboard_context->curbe.upload = 1;
186 }
187
188 static void
189 i965_avc_hw_scoreboard_states_setup(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
190 {
191     i965_avc_hw_scoreboard_surface_state(avc_hw_scoreboard_context);
192     i965_avc_hw_scoreboard_binding_table(avc_hw_scoreboard_context);
193     i965_avc_hw_scoreboard_interface_descriptor_table(avc_hw_scoreboard_context);
194     i965_avc_hw_scoreboard_vfe_state(avc_hw_scoreboard_context);
195     i965_avc_hw_scoreboard_upload_constants(avc_hw_scoreboard_context);
196 }
197
198 static void
199 i965_avc_hw_scoreboard_pipeline_select(VADriverContextP ctx)
200 {
201     BEGIN_BATCH(ctx, 1);
202     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
203     ADVANCE_BATCH(ctx);
204 }
205
206 static void
207 i965_avc_hw_scoreboard_urb_layout(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
208 {
209     struct i965_driver_data *i965 = i965_driver_data(ctx);
210     unsigned int vfe_fence, cs_fence;
211
212     vfe_fence = avc_hw_scoreboard_context->urb.cs_start;
213     cs_fence = URB_SIZE((&i965->intel));
214
215     BEGIN_BATCH(ctx, 3);
216     OUT_BATCH(ctx, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
217     OUT_BATCH(ctx, 0);
218     OUT_BATCH(ctx, 
219               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
220               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
221     ADVANCE_BATCH(ctx);
222 }
223
224 static void
225 i965_avc_hw_scoreboard_state_base_address(VADriverContextP ctx)
226 {
227     BEGIN_BATCH(ctx, 8);
228     OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
229     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
230     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
231     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
232     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
233     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
234     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
235     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
236     ADVANCE_BATCH(ctx);
237 }
238
239 static void
240 i965_avc_hw_scoreboard_state_pointers(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
241 {
242     BEGIN_BATCH(ctx, 3);
243     OUT_BATCH(ctx, CMD_MEDIA_STATE_POINTERS | 1);
244     OUT_BATCH(ctx, 0);
245     OUT_RELOC(ctx, avc_hw_scoreboard_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
246     ADVANCE_BATCH(ctx);
247 }
248
249 static void 
250 i965_avc_hw_scoreboard_cs_urb_layout(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
251 {
252     BEGIN_BATCH(ctx, 2);
253     OUT_BATCH(ctx, CMD_CS_URB_STATE | 0);
254     OUT_BATCH(ctx,
255               ((avc_hw_scoreboard_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
256               (avc_hw_scoreboard_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
257     ADVANCE_BATCH(ctx);
258 }
259
260 static void
261 i965_avc_hw_scoreboard_constant_buffer(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
262 {
263     BEGIN_BATCH(ctx, 2);
264     OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
265     OUT_RELOC(ctx, avc_hw_scoreboard_context->curbe.bo,
266               I915_GEM_DOMAIN_INSTRUCTION, 0,
267               avc_hw_scoreboard_context->urb.size_cs_entry - 1);
268     ADVANCE_BATCH(ctx);    
269 }
270
271 static void
272 i965_avc_hw_scoreboard_objects(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
273 {
274     int number_mb_cmds = 512;
275     int starting_mb_number = avc_hw_scoreboard_context->inline_data.starting_mb_number;
276     int i;
277
278     for (i = 0; i < avc_hw_scoreboard_context->inline_data.num_mb_cmds / 512; i++) {
279         BEGIN_BATCH(ctx, 6);
280         OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 4);
281         OUT_BATCH(ctx, 0); /* interface descriptor offset: 0 */
282         OUT_BATCH(ctx, 0); /* no indirect data */
283         OUT_BATCH(ctx, 0);
284         OUT_BATCH(ctx, ((number_mb_cmds << 16) |
285                         (starting_mb_number << 0)));
286         OUT_BATCH(ctx, avc_hw_scoreboard_context->inline_data.pic_width_in_mbs);
287         ADVANCE_BATCH(ctx);
288
289         starting_mb_number += 512;
290     }
291
292     number_mb_cmds = avc_hw_scoreboard_context->inline_data.num_mb_cmds % 512;
293
294     if (number_mb_cmds) {
295         BEGIN_BATCH(ctx, 6);
296         OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 4);
297         OUT_BATCH(ctx, 0); /* interface descriptor offset: 0 */
298         OUT_BATCH(ctx, 0); /* no indirect data */
299         OUT_BATCH(ctx, 0);
300         OUT_BATCH(ctx, ((number_mb_cmds << 16) |
301                         (starting_mb_number << 0)));
302         OUT_BATCH(ctx, avc_hw_scoreboard_context->inline_data.pic_width_in_mbs);
303         ADVANCE_BATCH(ctx);
304     }
305 }
306
307 static void
308 i965_avc_hw_scoreboard_pipeline_setup(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
309 {
310     intel_batchbuffer_start_atomic(ctx, 0x1000);
311     intel_batchbuffer_emit_mi_flush(ctx);
312     i965_avc_hw_scoreboard_pipeline_select(ctx);
313     i965_avc_hw_scoreboard_state_base_address(ctx);
314     i965_avc_hw_scoreboard_state_pointers(ctx, avc_hw_scoreboard_context);
315     i965_avc_hw_scoreboard_urb_layout(ctx, avc_hw_scoreboard_context);
316     i965_avc_hw_scoreboard_cs_urb_layout(ctx, avc_hw_scoreboard_context);
317     i965_avc_hw_scoreboard_constant_buffer(ctx, avc_hw_scoreboard_context);
318     i965_avc_hw_scoreboard_objects(ctx, avc_hw_scoreboard_context);
319     intel_batchbuffer_end_atomic(ctx);
320 }
321
322 void
323 i965_avc_hw_scoreboard(VADriverContextP ctx, struct decode_state *decode_state, void *h264_context)
324 {
325     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
326
327     if (i965_h264_context->use_avc_hw_scoreboard) {
328         struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
329
330         avc_hw_scoreboard_context->inline_data.num_mb_cmds = i965_h264_context->avc_it_command_mb_info.mbs;
331         avc_hw_scoreboard_context->inline_data.starting_mb_number = i965_h264_context->avc_it_command_mb_info.mbs;
332         avc_hw_scoreboard_context->inline_data.pic_width_in_mbs = i965_h264_context->picture.width_in_mbs;
333         avc_hw_scoreboard_context->surface.total_mbs = i965_h264_context->avc_it_command_mb_info.mbs * 2;
334         
335         dri_bo_unreference(avc_hw_scoreboard_context->hw_kernel.bo);
336         avc_hw_scoreboard_context->hw_kernel.bo = i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo;
337         assert(avc_hw_scoreboard_context->hw_kernel.bo != NULL);
338         dri_bo_reference(avc_hw_scoreboard_context->hw_kernel.bo);
339
340         if (i965_h264_context->picture.mbaff_frame_flag)
341             avc_hw_scoreboard_context->hw_kernel.offset = avc_hw_scoreboard_kernel_offset[AVC_HW_SCOREBOARD_MBAFF];
342         else
343             avc_hw_scoreboard_context->hw_kernel.offset = avc_hw_scoreboard_kernel_offset[AVC_HW_SCOREBOARD];
344
345         i965_avc_hw_scoreboard_states_setup(avc_hw_scoreboard_context);
346         i965_avc_hw_scoreboard_pipeline_setup(ctx, avc_hw_scoreboard_context);
347     }
348 }
349
350 void
351 i965_avc_hw_scoreboard_decode_init(VADriverContextP ctx, void *h264_context)
352 {
353     struct i965_driver_data *i965 = i965_driver_data(ctx);
354     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
355
356     if (i965_h264_context->use_avc_hw_scoreboard) {
357         struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
358         dri_bo *bo;
359
360         if (avc_hw_scoreboard_context->curbe.bo == NULL) {
361             bo = dri_bo_alloc(i965->intel.bufmgr,
362                               "constant buffer",
363                               4096, 64);
364             assert(bo);
365             avc_hw_scoreboard_context->curbe.bo = bo;
366             avc_hw_scoreboard_context->curbe.upload = 0;
367         }
368
369         dri_bo_unreference(avc_hw_scoreboard_context->surface.s_bo);
370         avc_hw_scoreboard_context->surface.s_bo = i965_h264_context->avc_it_command_mb_info.bo;
371         assert(avc_hw_scoreboard_context->surface.s_bo != NULL);
372         dri_bo_reference(avc_hw_scoreboard_context->surface.s_bo);
373
374         dri_bo_unreference(avc_hw_scoreboard_context->surface.ss_bo);
375         bo = dri_bo_alloc(i965->intel.bufmgr, 
376                           "surface state", 
377                           sizeof(struct i965_surface_state), 32);
378         assert(bo);
379         avc_hw_scoreboard_context->surface.ss_bo = bo;
380
381         dri_bo_unreference(avc_hw_scoreboard_context->binding_table.bo);
382         bo = dri_bo_alloc(i965->intel.bufmgr, 
383                           "binding table",
384                           MAX_MEDIA_SURFACES * sizeof(unsigned int), 32);
385         assert(bo);
386         avc_hw_scoreboard_context->binding_table.bo = bo;
387
388         dri_bo_unreference(avc_hw_scoreboard_context->idrt.bo);
389         bo = dri_bo_alloc(i965->intel.bufmgr, 
390                           "interface discriptor", 
391                           MAX_INTERFACE_DESC * sizeof(struct i965_interface_descriptor), 16);
392         assert(bo);
393         avc_hw_scoreboard_context->idrt.bo = bo;
394
395         dri_bo_unreference(avc_hw_scoreboard_context->vfe_state.bo);
396         bo = dri_bo_alloc(i965->intel.bufmgr, 
397                           "vfe state", 
398                           sizeof(struct i965_vfe_state), 32);
399         assert(bo);
400         avc_hw_scoreboard_context->vfe_state.bo = bo;
401
402         avc_hw_scoreboard_context->urb.num_vfe_entries = 32;
403         avc_hw_scoreboard_context->urb.size_vfe_entry = 2;
404         avc_hw_scoreboard_context->urb.num_cs_entries = 1;
405         avc_hw_scoreboard_context->urb.size_cs_entry = 1;
406         avc_hw_scoreboard_context->urb.vfe_start = 0;
407         avc_hw_scoreboard_context->urb.cs_start = avc_hw_scoreboard_context->urb.vfe_start + 
408             avc_hw_scoreboard_context->urb.num_vfe_entries * avc_hw_scoreboard_context->urb.size_vfe_entry;
409         assert(avc_hw_scoreboard_context->urb.cs_start + 
410                avc_hw_scoreboard_context->urb.num_cs_entries * avc_hw_scoreboard_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
411     }
412 }
413
414 Bool 
415 i965_avc_hw_scoreboard_ternimate(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
416 {
417     dri_bo_unreference(avc_hw_scoreboard_context->curbe.bo);
418     avc_hw_scoreboard_context->curbe.bo = NULL;
419
420     dri_bo_unreference(avc_hw_scoreboard_context->surface.ss_bo);
421     avc_hw_scoreboard_context->surface.ss_bo = NULL;
422
423     dri_bo_unreference(avc_hw_scoreboard_context->surface.s_bo);
424     avc_hw_scoreboard_context->surface.s_bo = NULL;
425
426     dri_bo_unreference(avc_hw_scoreboard_context->binding_table.bo);
427     avc_hw_scoreboard_context->binding_table.bo = NULL;
428
429     dri_bo_unreference(avc_hw_scoreboard_context->idrt.bo);
430     avc_hw_scoreboard_context->idrt.bo = NULL;
431
432     dri_bo_unreference(avc_hw_scoreboard_context->vfe_state.bo);
433     avc_hw_scoreboard_context->vfe_state.bo = NULL;
434
435     dri_bo_unreference(avc_hw_scoreboard_context->hw_kernel.bo);
436     avc_hw_scoreboard_context->hw_kernel.bo = NULL;
437
438     return True;
439 }