BDW doesn't support H.264 Baseline profile
[platform/upstream/libva-intel-driver.git] / src / i965_gpe_utils.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Xiang Haihao <haihao.xiang@intel.com>
25  */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <assert.h>
31
32 #include "intel_batchbuffer.h"
33 #include "intel_driver.h"
34
35 #include "i965_gpe_utils.h"
36
37 static void
38 i965_gpe_select(VADriverContextP ctx,
39                 struct i965_gpe_context *gpe_context,
40                 struct intel_batchbuffer *batch)
41 {
42     BEGIN_BATCH(batch, 1);
43     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
44     ADVANCE_BATCH(batch);
45 }
46
47 static void
48 gen6_gpe_state_base_address(VADriverContextP ctx,
49                             struct i965_gpe_context *gpe_context,
50                             struct intel_batchbuffer *batch)
51 {
52     BEGIN_BATCH(batch, 10);
53
54     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
55     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* General State Base Address */
56     OUT_RELOC(batch,
57               gpe_context->surface_state_binding_table.bo,
58               I915_GEM_DOMAIN_INSTRUCTION,
59               0,
60               BASE_ADDRESS_MODIFY);                     /* Surface state base address */
61     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Dynamic State Base Address */
62     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Indirect Object Base Address */
63     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Instruction Base Address */
64     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* General State Access Upper Bound */
65     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Dynamic State Access Upper Bound */
66     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Indirect Object Access Upper Bound */
67     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Instruction Access Upper Bound */
68
69     ADVANCE_BATCH(batch);
70 }
71
72 static void
73 gen6_gpe_vfe_state(VADriverContextP ctx,
74                    struct i965_gpe_context *gpe_context,
75                    struct intel_batchbuffer *batch)
76 {
77
78     BEGIN_BATCH(batch, 8);
79
80     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
81     OUT_BATCH(batch, 0);                                        /* Scratch Space Base Pointer and Space */
82     OUT_BATCH(batch,
83               gpe_context->vfe_state.max_num_threads << 16 |    /* Maximum Number of Threads */
84               gpe_context->vfe_state.num_urb_entries << 8 |     /* Number of URB Entries */
85               gpe_context->vfe_state.gpgpu_mode << 2);          /* MEDIA Mode */
86     OUT_BATCH(batch, 0);                                        /* Debug: Object ID */
87     OUT_BATCH(batch,
88               gpe_context->vfe_state.urb_entry_size << 16 |     /* URB Entry Allocation Size */
89               gpe_context->vfe_state.curbe_allocation_size);    /* CURBE Allocation Size */
90     /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
91     OUT_BATCH(batch, gpe_context->vfe_desc5.dword);                                        
92     OUT_BATCH(batch, gpe_context->vfe_desc6.dword);                                       
93     OUT_BATCH(batch, gpe_context->vfe_desc7.dword);                                       
94         
95     ADVANCE_BATCH(batch);
96
97 }
98
99 static void
100 gen6_gpe_curbe_load(VADriverContextP ctx,
101                     struct i965_gpe_context *gpe_context,
102                     struct intel_batchbuffer *batch)
103 {
104     BEGIN_BATCH(batch, 4);
105
106     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
107     OUT_BATCH(batch, 0);
108     OUT_BATCH(batch, gpe_context->curbe.length);
109     OUT_RELOC(batch, gpe_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
110
111     ADVANCE_BATCH(batch);
112 }
113
114 static void
115 gen6_gpe_idrt(VADriverContextP ctx,
116               struct i965_gpe_context *gpe_context,
117               struct intel_batchbuffer *batch)
118 {
119     BEGIN_BATCH(batch, 4);
120
121     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
122     OUT_BATCH(batch, 0);
123     OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
124     OUT_RELOC(batch, gpe_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
125
126     ADVANCE_BATCH(batch);
127 }
128
129 void
130 i965_gpe_load_kernels(VADriverContextP ctx,
131                       struct i965_gpe_context *gpe_context,
132                       struct i965_kernel *kernel_list,
133                       unsigned int num_kernels)
134 {
135     struct i965_driver_data *i965 = i965_driver_data(ctx);
136     int i;
137
138     assert(num_kernels <= MAX_GPE_KERNELS);
139     memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
140     gpe_context->num_kernels = num_kernels;
141
142     for (i = 0; i < num_kernels; i++) {
143         struct i965_kernel *kernel = &gpe_context->kernels[i];
144
145         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
146                                   kernel->name, 
147                                   kernel->size,
148                                   0x1000);
149         assert(kernel->bo);
150         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
151     }
152 }
153
154 void
155 i965_gpe_context_destroy(struct i965_gpe_context *gpe_context)
156 {
157     int i;
158
159     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
160     gpe_context->surface_state_binding_table.bo = NULL;
161
162     dri_bo_unreference(gpe_context->idrt.bo);
163     gpe_context->idrt.bo = NULL;
164
165     dri_bo_unreference(gpe_context->curbe.bo);
166     gpe_context->curbe.bo = NULL;
167
168     for (i = 0; i < gpe_context->num_kernels; i++) {
169         struct i965_kernel *kernel = &gpe_context->kernels[i];
170
171         dri_bo_unreference(kernel->bo);
172         kernel->bo = NULL;
173     }
174 }
175
176 void
177 i965_gpe_context_init(VADriverContextP ctx,
178                       struct i965_gpe_context *gpe_context)
179 {
180     struct i965_driver_data *i965 = i965_driver_data(ctx);
181     dri_bo *bo;
182
183     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
184     bo = dri_bo_alloc(i965->intel.bufmgr,
185                       "surface state & binding table",
186                       gpe_context->surface_state_binding_table.length,
187                       4096);
188     assert(bo);
189     gpe_context->surface_state_binding_table.bo = bo;
190
191     dri_bo_unreference(gpe_context->idrt.bo);
192     bo = dri_bo_alloc(i965->intel.bufmgr,
193                       "interface descriptor table",
194                       gpe_context->idrt.entry_size * gpe_context->idrt.max_entries,
195                       4096);
196     assert(bo);
197     gpe_context->idrt.bo = bo;
198
199     dri_bo_unreference(gpe_context->curbe.bo);
200     bo = dri_bo_alloc(i965->intel.bufmgr,
201                       "curbe buffer",
202                       gpe_context->curbe.length,
203                       4096);
204     assert(bo);
205     gpe_context->curbe.bo = bo;
206 }
207
208 void
209 gen6_gpe_pipeline_setup(VADriverContextP ctx,
210                         struct i965_gpe_context *gpe_context,
211                         struct intel_batchbuffer *batch)
212 {
213     intel_batchbuffer_emit_mi_flush(batch);
214
215     i965_gpe_select(ctx, gpe_context, batch);
216     gen6_gpe_state_base_address(ctx, gpe_context, batch);
217     gen6_gpe_vfe_state(ctx, gpe_context, batch);
218     gen6_gpe_curbe_load(ctx, gpe_context, batch);
219     gen6_gpe_idrt(ctx, gpe_context, batch);
220 }
221
222 static void
223 i965_gpe_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
224 {
225     switch (tiling) {
226     case I915_TILING_NONE:
227         ss->ss3.tiled_surface = 0;
228         ss->ss3.tile_walk = 0;
229         break;
230     case I915_TILING_X:
231         ss->ss3.tiled_surface = 1;
232         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
233         break;
234     case I915_TILING_Y:
235         ss->ss3.tiled_surface = 1;
236         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
237         break;
238     }
239 }
240
241 static void
242 i965_gpe_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
243 {
244     switch (tiling) {
245     case I915_TILING_NONE:
246         ss->ss2.tiled_surface = 0;
247         ss->ss2.tile_walk = 0;
248         break;
249     case I915_TILING_X:
250         ss->ss2.tiled_surface = 1;
251         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
252         break;
253     case I915_TILING_Y:
254         ss->ss2.tiled_surface = 1;
255         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
256         break;
257     }
258 }
259
260 static void
261 gen7_gpe_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
262 {
263     switch (tiling) {
264     case I915_TILING_NONE:
265         ss->ss0.tiled_surface = 0;
266         ss->ss0.tile_walk = 0;
267         break;
268     case I915_TILING_X:
269         ss->ss0.tiled_surface = 1;
270         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
271         break;
272     case I915_TILING_Y:
273         ss->ss0.tiled_surface = 1;
274         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
275         break;
276     }
277 }
278
279 static void
280 gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
281 {
282     switch (tiling) {
283     case I915_TILING_NONE:
284         ss->ss2.tiled_surface = 0;
285         ss->ss2.tile_walk = 0;
286         break;
287     case I915_TILING_X:
288         ss->ss2.tiled_surface = 1;
289         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
290         break;
291     case I915_TILING_Y:
292         ss->ss2.tiled_surface = 1;
293         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
294         break;
295     }
296 }
297
298 static void
299 gen8_gpe_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
300 {
301     switch (tiling) {
302     case I915_TILING_NONE:
303         ss->ss0.tiled_surface = 0;
304         ss->ss0.tile_walk = 0;
305         break;
306     case I915_TILING_X:
307         ss->ss0.tiled_surface = 1;
308         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
309         break;
310     case I915_TILING_Y:
311         ss->ss0.tiled_surface = 1;
312         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
313         break;
314     }
315 }
316
317 static void
318 gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
319 {
320     switch (tiling) {
321     case I915_TILING_NONE:
322         ss->ss2.tiled_surface = 0;
323         ss->ss2.tile_walk = 0;
324         break;
325     case I915_TILING_X:
326         ss->ss2.tiled_surface = 1;
327         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
328         break;
329     case I915_TILING_Y:
330         ss->ss2.tiled_surface = 1;
331         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
332         break;
333     }
334 }
335
336 static void
337 i965_gpe_set_surface2_state(VADriverContextP ctx,
338                             struct object_surface *obj_surface,
339                             struct i965_surface_state2 *ss)
340 {
341     int w, h, w_pitch;
342     unsigned int tiling, swizzle;
343
344     assert(obj_surface->bo);
345     assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
346
347     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
348     w = obj_surface->orig_width;
349     h = obj_surface->orig_height;
350     w_pitch = obj_surface->width;
351
352     memset(ss, 0, sizeof(*ss));
353     /* ss0 */
354     ss->ss0.surface_base_address = obj_surface->bo->offset;
355     /* ss1 */
356     ss->ss1.cbcr_pixel_offset_v_direction = 2;
357     ss->ss1.width = w - 1;
358     ss->ss1.height = h - 1;
359     /* ss2 */
360     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
361     ss->ss2.interleave_chroma = 1;
362     ss->ss2.pitch = w_pitch - 1;
363     ss->ss2.half_pitch_for_chroma = 0;
364     i965_gpe_set_surface2_tiling(ss, tiling);
365     /* ss3: UV offset for interleave mode */
366     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
367     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
368 }
369
370 void
371 i965_gpe_surface2_setup(VADriverContextP ctx,
372                         struct i965_gpe_context *gpe_context,
373                         struct object_surface *obj_surface,
374                         unsigned long binding_table_offset,
375                         unsigned long surface_state_offset)
376 {
377     struct i965_surface_state2 *ss;
378     dri_bo *bo;
379
380     bo = gpe_context->surface_state_binding_table.bo;
381     dri_bo_map(bo, 1);
382     assert(bo->virtual);
383
384     ss = (struct i965_surface_state2 *)((char *)bo->virtual + surface_state_offset);
385     i965_gpe_set_surface2_state(ctx, obj_surface, ss);
386     dri_bo_emit_reloc(bo,
387                       I915_GEM_DOMAIN_RENDER, 0,
388                       0,
389                       surface_state_offset + offsetof(struct i965_surface_state2, ss0),
390                       obj_surface->bo);
391
392     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
393     dri_bo_unmap(bo);
394 }
395
396 static void
397 i965_gpe_set_media_rw_surface_state(VADriverContextP ctx,
398                                     struct object_surface *obj_surface,
399                                     struct i965_surface_state *ss)
400 {
401     int w, h, w_pitch;
402     unsigned int tiling, swizzle;
403
404     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
405     w = obj_surface->orig_width;
406     h = obj_surface->orig_height;
407     w_pitch = obj_surface->width;
408
409     memset(ss, 0, sizeof(*ss));
410     /* ss0 */
411     ss->ss0.surface_type = I965_SURFACE_2D;
412     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
413     /* ss1 */
414     ss->ss1.base_addr = obj_surface->bo->offset;
415     /* ss2 */
416     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
417     ss->ss2.height = h - 1;
418     /* ss3 */
419     ss->ss3.pitch = w_pitch - 1;
420     i965_gpe_set_surface_tiling(ss, tiling);
421 }
422
423 void
424 i965_gpe_media_rw_surface_setup(VADriverContextP ctx,
425                                 struct i965_gpe_context *gpe_context,
426                                 struct object_surface *obj_surface,
427                                 unsigned long binding_table_offset,
428                                 unsigned long surface_state_offset)
429 {
430     struct i965_surface_state *ss;
431     dri_bo *bo;
432
433     bo = gpe_context->surface_state_binding_table.bo;
434     dri_bo_map(bo, True);
435     assert(bo->virtual);
436
437     ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
438     i965_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
439     dri_bo_emit_reloc(bo,
440                       I915_GEM_DOMAIN_RENDER, 0,
441                       0,
442                       surface_state_offset + offsetof(struct i965_surface_state, ss1),
443                       obj_surface->bo);
444
445     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
446     dri_bo_unmap(bo);
447 }
448
449 static void
450 i965_gpe_set_buffer_surface_state(VADriverContextP ctx,
451                                   struct i965_buffer_surface *buffer_surface,
452                                   struct i965_surface_state *ss)
453 {
454     int num_entries;
455
456     assert(buffer_surface->bo);
457     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
458
459     memset(ss, 0, sizeof(*ss));
460     /* ss0 */
461     ss->ss0.render_cache_read_mode = 1;
462     ss->ss0.surface_type = I965_SURFACE_BUFFER;
463     /* ss1 */
464     ss->ss1.base_addr = buffer_surface->bo->offset;
465     /* ss2 */
466     ss->ss2.width = ((num_entries - 1) & 0x7f);
467     ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
468     /* ss3 */
469     ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
470     ss->ss3.pitch = buffer_surface->pitch - 1;
471 }
472
473 void
474 i965_gpe_buffer_suface_setup(VADriverContextP ctx,
475                              struct i965_gpe_context *gpe_context,
476                              struct i965_buffer_surface *buffer_surface,
477                              unsigned long binding_table_offset,
478                              unsigned long surface_state_offset)
479 {
480     struct i965_surface_state *ss;
481     dri_bo *bo;
482
483     bo = gpe_context->surface_state_binding_table.bo;
484     dri_bo_map(bo, 1);
485     assert(bo->virtual);
486
487     ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
488     i965_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
489     dri_bo_emit_reloc(bo,
490                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
491                       0,
492                       surface_state_offset + offsetof(struct i965_surface_state, ss1),
493                       buffer_surface->bo);
494
495     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
496     dri_bo_unmap(bo);
497 }
498
499 static void
500 gen7_gpe_set_surface2_state(VADriverContextP ctx,
501                             struct object_surface *obj_surface,
502                             struct gen7_surface_state2 *ss)
503 {
504     int w, h, w_pitch;
505     unsigned int tiling, swizzle;
506
507     assert(obj_surface->bo);
508     assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
509
510     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
511     w = obj_surface->orig_width;
512     h = obj_surface->orig_height;
513     w_pitch = obj_surface->width;
514
515     memset(ss, 0, sizeof(*ss));
516     /* ss0 */
517     ss->ss0.surface_base_address = obj_surface->bo->offset;
518     /* ss1 */
519     ss->ss1.cbcr_pixel_offset_v_direction = 2;
520     ss->ss1.width = w - 1;
521     ss->ss1.height = h - 1;
522     /* ss2 */
523     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
524     ss->ss2.interleave_chroma = 1;
525     ss->ss2.pitch = w_pitch - 1;
526     ss->ss2.half_pitch_for_chroma = 0;
527     gen7_gpe_set_surface2_tiling(ss, tiling);
528     /* ss3: UV offset for interleave mode */
529     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
530     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
531 }
532
533 void
534 gen7_gpe_surface2_setup(VADriverContextP ctx,
535                         struct i965_gpe_context *gpe_context,
536                         struct object_surface *obj_surface,
537                         unsigned long binding_table_offset,
538                         unsigned long surface_state_offset)
539 {
540     struct gen7_surface_state2 *ss;
541     dri_bo *bo;
542
543     bo = gpe_context->surface_state_binding_table.bo;
544     dri_bo_map(bo, 1);
545     assert(bo->virtual);
546
547     ss = (struct gen7_surface_state2 *)((char *)bo->virtual + surface_state_offset);
548     gen7_gpe_set_surface2_state(ctx, obj_surface, ss);
549     dri_bo_emit_reloc(bo,
550                       I915_GEM_DOMAIN_RENDER, 0,
551                       0,
552                       surface_state_offset + offsetof(struct gen7_surface_state2, ss0),
553                       obj_surface->bo);
554
555     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
556     dri_bo_unmap(bo);
557 }
558
559 static void
560 gen7_gpe_set_media_rw_surface_state(VADriverContextP ctx,
561                                     struct object_surface *obj_surface,
562                                     struct gen7_surface_state *ss)
563 {
564     int w, h, w_pitch;
565     unsigned int tiling, swizzle;
566
567     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
568     w = obj_surface->orig_width;
569     h = obj_surface->orig_height;
570     w_pitch = obj_surface->width;
571
572     memset(ss, 0, sizeof(*ss));
573     /* ss0 */
574     ss->ss0.surface_type = I965_SURFACE_2D;
575     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
576     /* ss1 */
577     ss->ss1.base_addr = obj_surface->bo->offset;
578     /* ss2 */
579     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
580     ss->ss2.height = h - 1;
581     /* ss3 */
582     ss->ss3.pitch = w_pitch - 1;
583     gen7_gpe_set_surface_tiling(ss, tiling);
584 }
585
586 static void
587 gen75_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
588                                     struct object_surface *obj_surface,
589                                     struct gen7_surface_state *ss)
590 {
591     int w, w_pitch;
592     unsigned int tiling, swizzle;
593     int cbcr_offset;
594
595     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
596     w = obj_surface->orig_width;
597     w_pitch = obj_surface->width;
598
599     cbcr_offset = obj_surface->height * obj_surface->width;
600     memset(ss, 0, sizeof(*ss));
601     /* ss0 */
602     ss->ss0.surface_type = I965_SURFACE_2D;
603     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
604     /* ss1 */
605     ss->ss1.base_addr = obj_surface->bo->offset + cbcr_offset;
606     /* ss2 */
607     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
608     ss->ss2.height = (obj_surface->height / 2) -1;
609     /* ss3 */
610     ss->ss3.pitch = w_pitch - 1;
611     gen7_gpe_set_surface_tiling(ss, tiling);
612 }
613
614 void
615 gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
616                                 struct i965_gpe_context *gpe_context,
617                                 struct object_surface *obj_surface,
618                                 unsigned long binding_table_offset,
619                                 unsigned long surface_state_offset)
620 {
621     struct gen7_surface_state *ss;
622     dri_bo *bo;
623
624     bo = gpe_context->surface_state_binding_table.bo;
625     dri_bo_map(bo, True);
626     assert(bo->virtual);
627
628     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
629     gen7_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
630     dri_bo_emit_reloc(bo,
631                       I915_GEM_DOMAIN_RENDER, 0,
632                       0,
633                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
634                       obj_surface->bo);
635
636     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
637     dri_bo_unmap(bo);
638 }
639
640 void
641 gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
642                                 struct i965_gpe_context *gpe_context,
643                                 struct object_surface *obj_surface,
644                                 unsigned long binding_table_offset,
645                                 unsigned long surface_state_offset)
646 {
647     struct gen7_surface_state *ss;
648     dri_bo *bo;
649     int cbcr_offset;
650
651         assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
652     bo = gpe_context->surface_state_binding_table.bo;
653     dri_bo_map(bo, True);
654     assert(bo->virtual);
655
656     cbcr_offset = obj_surface->height * obj_surface->width;
657     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
658     gen75_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
659     dri_bo_emit_reloc(bo,
660                       I915_GEM_DOMAIN_RENDER, 0,
661                       cbcr_offset,
662                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
663                       obj_surface->bo);
664
665     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
666     dri_bo_unmap(bo);
667 }
668
669
670 static void
671 gen7_gpe_set_buffer_surface_state(VADriverContextP ctx,
672                                   struct i965_buffer_surface *buffer_surface,
673                                   struct gen7_surface_state *ss)
674 {
675     int num_entries;
676
677     assert(buffer_surface->bo);
678     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
679
680     memset(ss, 0, sizeof(*ss));
681     /* ss0 */
682     ss->ss0.surface_type = I965_SURFACE_BUFFER;
683     /* ss1 */
684     ss->ss1.base_addr = buffer_surface->bo->offset;
685     /* ss2 */
686     ss->ss2.width = ((num_entries - 1) & 0x7f);
687     ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
688     /* ss3 */
689     ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
690     ss->ss3.pitch = buffer_surface->pitch - 1;
691 }
692
693 void
694 gen7_gpe_buffer_suface_setup(VADriverContextP ctx,
695                              struct i965_gpe_context *gpe_context,
696                              struct i965_buffer_surface *buffer_surface,
697                              unsigned long binding_table_offset,
698                              unsigned long surface_state_offset)
699 {
700     struct gen7_surface_state *ss;
701     dri_bo *bo;
702
703     bo = gpe_context->surface_state_binding_table.bo;
704     dri_bo_map(bo, 1);
705     assert(bo->virtual);
706
707     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
708     gen7_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
709     dri_bo_emit_reloc(bo,
710                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
711                       0,
712                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
713                       buffer_surface->bo);
714
715     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
716     dri_bo_unmap(bo);
717 }
718
719 static void
720 gen8_gpe_set_surface2_state(VADriverContextP ctx,
721                             struct object_surface *obj_surface,
722                             struct gen8_surface_state2 *ss)
723 {
724     int w, h, w_pitch;
725     unsigned int tiling, swizzle;
726
727     assert(obj_surface->bo);
728     assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
729
730     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
731     w = obj_surface->orig_width;
732     h = obj_surface->orig_height;
733     w_pitch = obj_surface->width;
734
735     memset(ss, 0, sizeof(*ss));
736     /* ss0 */
737     ss->ss6.base_addr = obj_surface->bo->offset;
738     /* ss1 */
739     ss->ss1.cbcr_pixel_offset_v_direction = 2;
740     ss->ss1.width = w - 1;
741     ss->ss1.height = h - 1;
742     /* ss2 */
743     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
744     ss->ss2.interleave_chroma = 1;
745     ss->ss2.pitch = w_pitch - 1;
746     ss->ss2.half_pitch_for_chroma = 0;
747     gen8_gpe_set_surface2_tiling(ss, tiling);
748     /* ss3: UV offset for interleave mode */
749     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
750     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
751 }
752
753 void
754 gen8_gpe_surface2_setup(VADriverContextP ctx,
755                         struct i965_gpe_context *gpe_context,
756                         struct object_surface *obj_surface,
757                         unsigned long binding_table_offset,
758                         unsigned long surface_state_offset)
759 {
760     struct gen8_surface_state2 *ss;
761     dri_bo *bo;
762
763     bo = gpe_context->surface_state_binding_table.bo;
764     dri_bo_map(bo, 1);
765     assert(bo->virtual);
766
767     ss = (struct gen8_surface_state2 *)((char *)bo->virtual + surface_state_offset);
768     gen8_gpe_set_surface2_state(ctx, obj_surface, ss);
769     dri_bo_emit_reloc(bo,
770                       I915_GEM_DOMAIN_RENDER, 0,
771                       0,
772                       surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
773                       obj_surface->bo);
774
775     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
776     dri_bo_unmap(bo);
777 }
778
779 static void
780 gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx,
781                                     struct object_surface *obj_surface,
782                                     struct gen8_surface_state *ss)
783 {
784     int w, h, w_pitch;
785     unsigned int tiling, swizzle;
786
787     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
788     w = obj_surface->orig_width;
789     h = obj_surface->orig_height;
790     w_pitch = obj_surface->width;
791
792     memset(ss, 0, sizeof(*ss));
793     /* ss0 */
794     ss->ss0.surface_type = I965_SURFACE_2D;
795     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
796     /* ss1 */
797     ss->ss8.base_addr = obj_surface->bo->offset;
798     /* ss2 */
799     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
800     ss->ss2.height = h - 1;
801     /* ss3 */
802     ss->ss3.pitch = w_pitch - 1;
803     gen8_gpe_set_surface_tiling(ss, tiling);
804 }
805
806 static void
807 gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
808                                     struct object_surface *obj_surface,
809                                     struct gen8_surface_state *ss)
810 {
811     int w, h, w_pitch;
812     unsigned int tiling, swizzle;
813     int cbcr_offset;
814
815     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
816     w = obj_surface->orig_width;
817     h = obj_surface->orig_height;
818     w_pitch = obj_surface->width;
819
820     cbcr_offset = obj_surface->height * obj_surface->width;
821     memset(ss, 0, sizeof(*ss));
822     /* ss0 */
823     ss->ss0.surface_type = I965_SURFACE_2D;
824     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
825     /* ss1 */
826     ss->ss8.base_addr = obj_surface->bo->offset + cbcr_offset;
827     /* ss2 */
828     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
829     ss->ss2.height = (obj_surface->height / 2) -1;
830     /* ss3 */
831     ss->ss3.pitch = w_pitch - 1;
832     gen8_gpe_set_surface_tiling(ss, tiling);
833 }
834
835 void
836 gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,
837                                 struct i965_gpe_context *gpe_context,
838                                 struct object_surface *obj_surface,
839                                 unsigned long binding_table_offset,
840                                 unsigned long surface_state_offset)
841 {
842     struct gen8_surface_state *ss;
843     dri_bo *bo;
844
845     bo = gpe_context->surface_state_binding_table.bo;
846     dri_bo_map(bo, True);
847     assert(bo->virtual);
848
849     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
850     gen8_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
851     dri_bo_emit_reloc(bo,
852                       I915_GEM_DOMAIN_RENDER, 0,
853                       0,
854                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
855                       obj_surface->bo);
856
857     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
858     dri_bo_unmap(bo);
859 }
860
861 void
862 gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
863                                 struct i965_gpe_context *gpe_context,
864                                 struct object_surface *obj_surface,
865                                 unsigned long binding_table_offset,
866                                 unsigned long surface_state_offset)
867 {
868     struct gen8_surface_state *ss;
869     dri_bo *bo;
870     int cbcr_offset;
871
872         assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
873     bo = gpe_context->surface_state_binding_table.bo;
874     dri_bo_map(bo, True);
875     assert(bo->virtual);
876
877     cbcr_offset = obj_surface->height * obj_surface->width;
878     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
879     gen8_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
880     dri_bo_emit_reloc(bo,
881                       I915_GEM_DOMAIN_RENDER, 0,
882                       cbcr_offset,
883                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
884                       obj_surface->bo);
885
886     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
887     dri_bo_unmap(bo);
888 }
889
890
891 static void
892 gen8_gpe_set_buffer_surface_state(VADriverContextP ctx,
893                                   struct i965_buffer_surface *buffer_surface,
894                                   struct gen8_surface_state *ss)
895 {
896     int num_entries;
897
898     assert(buffer_surface->bo);
899     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
900
901     memset(ss, 0, sizeof(*ss));
902     /* ss0 */
903     ss->ss0.surface_type = I965_SURFACE_BUFFER;
904     /* ss1 */
905     ss->ss8.base_addr = buffer_surface->bo->offset;
906     /* ss2 */
907     ss->ss2.width = ((num_entries - 1) & 0x7f);
908     ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
909     /* ss3 */
910     ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
911     ss->ss3.pitch = buffer_surface->pitch - 1;
912 }
913
914 void
915 gen8_gpe_buffer_suface_setup(VADriverContextP ctx,
916                              struct i965_gpe_context *gpe_context,
917                              struct i965_buffer_surface *buffer_surface,
918                              unsigned long binding_table_offset,
919                              unsigned long surface_state_offset)
920 {
921     struct gen8_surface_state *ss;
922     dri_bo *bo;
923
924     bo = gpe_context->surface_state_binding_table.bo;
925     dri_bo_map(bo, 1);
926     assert(bo->virtual);
927
928     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
929     gen8_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
930     dri_bo_emit_reloc(bo,
931                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
932                       0,
933                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
934                       buffer_surface->bo);
935
936     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
937     dri_bo_unmap(bo);
938 }
939
940 static void
941 gen8_gpe_state_base_address(VADriverContextP ctx,
942                             struct i965_gpe_context *gpe_context,
943                             struct intel_batchbuffer *batch)
944 {
945     BEGIN_BATCH(batch, 16);
946
947     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 14);
948
949     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //General State Base Address
950     OUT_BATCH(batch, 0);
951     OUT_BATCH(batch, 0);
952
953         /*DW4 Surface state base address */
954     OUT_RELOC(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
955     OUT_BATCH(batch, 0);
956
957         /*DW6. Dynamic state base address */
958     if (gpe_context->dynamic_state.bo)
959         OUT_RELOC(batch, gpe_context->dynamic_state.bo,
960                   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
961                   0, BASE_ADDRESS_MODIFY);
962     else
963         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
964
965     OUT_BATCH(batch, 0);
966
967         /*DW8. Indirect Object base address */
968     if (gpe_context->indirect_state.bo)
969         OUT_RELOC(batch, gpe_context->indirect_state.bo,
970                   I915_GEM_DOMAIN_SAMPLER,
971                   0, BASE_ADDRESS_MODIFY);
972     else
973         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
974
975     OUT_BATCH(batch, 0);
976
977         /*DW10. Instruct base address */
978     if (gpe_context->instruction_state.bo)
979         OUT_RELOC(batch, gpe_context->instruction_state.bo,
980                   I915_GEM_DOMAIN_INSTRUCTION,
981                   0, BASE_ADDRESS_MODIFY);
982     else
983         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
984
985     OUT_BATCH(batch, 0);
986
987         /* DW12. Size limitation */
988     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //General State Access Upper Bound      
989     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Dynamic State Access Upper Bound
990     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Indirect Object Access Upper Bound
991     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Instruction Access Upper Bound
992
993     /*
994       OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                                //LLC Coherent Base Address
995       OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY );              //LLC Coherent Upper Bound
996     */
997
998     ADVANCE_BATCH(batch);
999 }
1000
1001 static void
1002 gen8_gpe_vfe_state(VADriverContextP ctx,
1003                    struct i965_gpe_context *gpe_context,
1004                    struct intel_batchbuffer *batch)
1005 {
1006
1007     BEGIN_BATCH(batch, 9);
1008
1009     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
1010     /* Scratch Space Base Pointer and Space */
1011     OUT_BATCH(batch, 0);    
1012     OUT_BATCH(batch, 0);
1013
1014     OUT_BATCH(batch,
1015               gpe_context->vfe_state.max_num_threads << 16 |    /* Maximum Number of Threads */
1016               gpe_context->vfe_state.num_urb_entries << 8 |     /* Number of URB Entries */
1017               gpe_context->vfe_state.gpgpu_mode << 2);          /* MEDIA Mode */
1018     OUT_BATCH(batch, 0);                                        /* Debug: Object ID */
1019     OUT_BATCH(batch,
1020               gpe_context->vfe_state.urb_entry_size << 16 |     /* URB Entry Allocation Size */
1021               gpe_context->vfe_state.curbe_allocation_size);    /* CURBE Allocation Size */
1022
1023     /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
1024     OUT_BATCH(batch, gpe_context->vfe_desc5.dword);                                        
1025     OUT_BATCH(batch, gpe_context->vfe_desc6.dword);                                       
1026     OUT_BATCH(batch, gpe_context->vfe_desc7.dword);                                       
1027         
1028     ADVANCE_BATCH(batch);
1029
1030 }
1031
1032
1033 static void
1034 gen8_gpe_curbe_load(VADriverContextP ctx,
1035                     struct i965_gpe_context *gpe_context,
1036                     struct intel_batchbuffer *batch)
1037 {
1038     BEGIN_BATCH(batch, 4);
1039
1040     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
1041     OUT_BATCH(batch, 0);
1042     OUT_BATCH(batch, gpe_context->curbe_size);
1043     OUT_BATCH(batch, gpe_context->curbe_offset);
1044
1045     ADVANCE_BATCH(batch);
1046 }
1047
1048 static void
1049 gen8_gpe_idrt(VADriverContextP ctx,
1050               struct i965_gpe_context *gpe_context,
1051               struct intel_batchbuffer *batch)
1052 {
1053     BEGIN_BATCH(batch, 4);
1054
1055     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
1056     OUT_BATCH(batch, 0);
1057     OUT_BATCH(batch, gpe_context->idrt_size);
1058     OUT_BATCH(batch, gpe_context->idrt_offset);
1059
1060     ADVANCE_BATCH(batch);
1061 }
1062
1063
1064 void
1065 gen8_gpe_pipeline_setup(VADriverContextP ctx,
1066                         struct i965_gpe_context *gpe_context,
1067                         struct intel_batchbuffer *batch)
1068 {
1069     intel_batchbuffer_emit_mi_flush(batch);
1070
1071     i965_gpe_select(ctx, gpe_context, batch);
1072     gen8_gpe_state_base_address(ctx, gpe_context, batch);
1073     gen8_gpe_vfe_state(ctx, gpe_context, batch);
1074     gen8_gpe_curbe_load(ctx, gpe_context, batch);
1075     gen8_gpe_idrt(ctx, gpe_context, batch);
1076 }
1077
1078 void
1079 gen8_gpe_context_init(VADriverContextP ctx,
1080                       struct i965_gpe_context *gpe_context)
1081 {
1082     struct i965_driver_data *i965 = i965_driver_data(ctx);
1083     dri_bo *bo;
1084     int bo_size;
1085     unsigned int end_offset;
1086
1087     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1088     bo = dri_bo_alloc(i965->intel.bufmgr,
1089                       "surface state & binding table",
1090                       gpe_context->surface_state_binding_table.length,
1091                       4096);
1092     assert(bo);
1093     gpe_context->surface_state_binding_table.bo = bo;
1094
1095     bo_size = gpe_context->idrt_size + gpe_context->curbe_size + gpe_context->sampler_size + 192;
1096     dri_bo_unreference(gpe_context->dynamic_state.bo);
1097     bo = dri_bo_alloc(i965->intel.bufmgr,
1098                       "surface state & binding table",
1099                       bo_size,
1100                       4096);
1101     assert(bo);
1102     gpe_context->dynamic_state.bo = bo;
1103     gpe_context->dynamic_state.bo_size = bo_size;
1104
1105     end_offset = 0;
1106     gpe_context->dynamic_state.end_offset = 0;
1107
1108     /* Constant buffer offset */
1109     gpe_context->curbe_offset = ALIGN(end_offset, 64);
1110     end_offset += gpe_context->curbe_size;
1111
1112     /* Interface descriptor offset */
1113     gpe_context->idrt_offset = ALIGN(end_offset, 64);
1114     end_offset += gpe_context->idrt_size;
1115
1116     /* Sampler state offset */
1117     gpe_context->sampler_offset = ALIGN(end_offset, 64);
1118     end_offset += gpe_context->sampler_size;
1119
1120     /* update the end offset of dynamic_state */
1121     gpe_context->dynamic_state.end_offset = end_offset;
1122 }
1123
1124
1125 void
1126 gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
1127 {
1128     int i;
1129
1130     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1131     gpe_context->surface_state_binding_table.bo = NULL;
1132
1133     dri_bo_unreference(gpe_context->instruction_state.bo);
1134     gpe_context->instruction_state.bo = NULL;
1135
1136     dri_bo_unreference(gpe_context->dynamic_state.bo);
1137     gpe_context->dynamic_state.bo = NULL;
1138
1139     dri_bo_unreference(gpe_context->indirect_state.bo);
1140     gpe_context->indirect_state.bo = NULL;
1141
1142 }
1143
1144
1145 void
1146 gen8_gpe_load_kernels(VADriverContextP ctx,
1147                       struct i965_gpe_context *gpe_context,
1148                       struct i965_kernel *kernel_list,
1149                       unsigned int num_kernels)
1150 {
1151     struct i965_driver_data *i965 = i965_driver_data(ctx);
1152     int i, kernel_size;
1153     unsigned int kernel_offset, end_offset;
1154     unsigned char *kernel_ptr;
1155     struct i965_kernel *kernel;
1156
1157     assert(num_kernels <= MAX_GPE_KERNELS);
1158     memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
1159     gpe_context->num_kernels = num_kernels;
1160
1161     kernel_size = num_kernels * 64;
1162     for (i = 0; i < num_kernels; i++) {
1163         kernel = &gpe_context->kernels[i];
1164
1165         kernel_size += kernel->size;
1166     }
1167
1168     gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1169                                   "kernel shader",
1170                                   kernel_size,
1171                                   0x1000);
1172     if (gpe_context->instruction_state.bo == NULL) {
1173         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1174         return;
1175     }
1176
1177     assert(gpe_context->instruction_state.bo);
1178
1179     gpe_context->instruction_state.bo_size = kernel_size;
1180     gpe_context->instruction_state.end_offset = 0;
1181     end_offset = 0;
1182
1183     dri_bo_map(gpe_context->instruction_state.bo, 1);
1184     kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual);
1185     for (i = 0; i < num_kernels; i++) {
1186         kernel_offset = ALIGN(end_offset, 64);
1187         kernel = &gpe_context->kernels[i];
1188         kernel->kernel_offset = kernel_offset;
1189
1190         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1191
1192         end_offset += kernel->size;
1193     }
1194
1195     gpe_context->instruction_state.end_offset = end_offset;
1196
1197     dri_bo_unmap(gpe_context->instruction_state.bo);
1198
1199     return;
1200 }
1201