VPP: Fix Coverity alert on unitialized vpp_kernels
[platform/upstream/libva-intel-driver.git] / src / gen8_render.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *    Zhao Yakui <yakui.zhao@intel.com>
28  *
29  */
30
31 /*
32  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
33  */
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <assert.h>
39 #include <math.h>
40
41 #include <va/va_drmcommon.h>
42
43 #include "intel_batchbuffer.h"
44 #include "intel_driver.h"
45 #include "i965_defines.h"
46 #include "i965_drv_video.h"
47 #include "i965_structs.h"
48
49 #include "i965_render.h"
50
51 #define SF_KERNEL_NUM_GRF       16
52 #define SF_MAX_THREADS          1
53
54 #define PS_KERNEL_NUM_GRF       48
55 #define PS_MAX_THREADS          32
56
57 /* Programs for Gen8 */
58 static const uint32_t sf_kernel_static_gen8[][4] ={
59
60 };
61 static const uint32_t ps_kernel_static_gen8[][4] = {
62 #include "shaders/render/exa_wm_src_affine.g8b"
63 #include "shaders/render/exa_wm_src_sample_planar.g8b"
64 #include "shaders/render/exa_wm_yuv_color_balance.g8b"
65 #include "shaders/render/exa_wm_yuv_rgb.g8b"
66 #include "shaders/render/exa_wm_write.g8b"
67 };
68
69 static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
70 #include "shaders/render/exa_wm_src_affine.g8b"
71 #include "shaders/render/exa_wm_src_sample_argb.g8b"
72 #include "shaders/render/exa_wm_write.g8b"
73 };
74
75
76 #define SURFACE_STATE_PADDED_SIZE       SURFACE_STATE_PADDED_SIZE_GEN8
77
78 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
79 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
80
81 enum {
82     SF_KERNEL = 0,
83     PS_KERNEL,
84     PS_SUBPIC_KERNEL
85 };
86
87 static struct i965_kernel render_kernels_gen8[] = {
88     {
89         "SF",
90         SF_KERNEL,
91         sf_kernel_static_gen8,
92         sizeof(sf_kernel_static_gen8),
93         NULL
94     },
95     {
96         "PS",
97         PS_KERNEL,
98         ps_kernel_static_gen8,
99         sizeof(ps_kernel_static_gen8),
100         NULL
101     },
102
103     {
104         "PS_SUBPIC",
105         PS_SUBPIC_KERNEL,
106         ps_subpic_kernel_static_gen8,
107         sizeof(ps_subpic_kernel_static_gen8),
108         NULL
109     }
110 };
111
112 #define URB_VS_ENTRIES        8
113 #define URB_VS_ENTRY_SIZE     1
114
115 #define URB_GS_ENTRIES        0
116 #define URB_GS_ENTRY_SIZE     0
117
118 #define URB_CLIP_ENTRIES      0
119 #define URB_CLIP_ENTRY_SIZE   0
120
121 #define URB_SF_ENTRIES        1
122 #define URB_SF_ENTRY_SIZE     2
123
124 #define URB_CS_ENTRIES        4
125 #define URB_CS_ENTRY_SIZE     4
126
127 static float yuv_to_rgb_bt601[3][4] = {
128 {1.164,         0,      1.596,          -0.06275,},
129 {1.164,         -0.392, -0.813,         -0.50196,},
130 {1.164,         2.017,  0,              -0.50196,},
131 };
132
133 static float yuv_to_rgb_bt709[3][4] = {
134 {1.164,         0,      1.793,          -0.06275,},
135 {1.164,         -0.213, -0.533,         -0.50196,},
136 {1.164,         2.112,  0,              -0.50196,},
137 };
138
139 static float yuv_to_rgb_smpte_240[3][4] = {
140 {1.164,         0,      1.794,          -0.06275,},
141 {1.164,         -0.258, -0.5425,        -0.50196,},
142 {1.164,         2.078,  0,              -0.50196,},
143 };
144
145
146 static void
147 gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
148 {
149    switch (tiling) {
150    case I915_TILING_NONE:
151       ss->ss0.tiled_surface = 0;
152       ss->ss0.tile_walk = 0;
153       break;
154    case I915_TILING_X:
155       ss->ss0.tiled_surface = 1;
156       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
157       break;
158    case I915_TILING_Y:
159       ss->ss0.tiled_surface = 1;
160       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
161       break;
162    }
163 }
164
165 /* Set "Shader Channel Select" for GEN8+ */
166 void
167 gen8_render_set_surface_scs(struct gen8_surface_state *ss)
168 {
169     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
170     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
171     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
172     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
173 }
174
175 static void
176 gen8_render_set_surface_state(
177     struct gen8_surface_state *ss,
178     dri_bo                    *bo,
179     unsigned long              offset,
180     int                        width,
181     int                        height,
182     int                        pitch,
183     int                        format,
184     unsigned int               flags
185 )
186 {
187     unsigned int tiling;
188     unsigned int swizzle;
189
190     memset(ss, 0, sizeof(*ss));
191
192     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
193     case I965_PP_FLAG_BOTTOM_FIELD:
194         ss->ss0.vert_line_stride_ofs = 1;
195         /* fall-through */
196     case I965_PP_FLAG_TOP_FIELD:
197         ss->ss0.vert_line_stride = 1;
198         height /= 2;
199         break;
200     }
201
202     ss->ss0.surface_type = I965_SURFACE_2D;
203     ss->ss0.surface_format = format;
204
205     ss->ss8.base_addr = bo->offset + offset;
206
207     ss->ss2.width = width - 1;
208     ss->ss2.height = height - 1;
209
210     ss->ss3.pitch = pitch - 1;
211
212     /* Always set 1(align 4 mode) per B-spec */
213     ss->ss0.vertical_alignment = 1;
214     ss->ss0.horizontal_alignment = 1;
215
216     dri_bo_get_tiling(bo, &tiling, &swizzle);
217     gen8_render_set_surface_tiling(ss, tiling);
218 }
219
220 static void
221 gen8_render_src_surface_state(
222     VADriverContextP ctx,
223     int              index,
224     dri_bo          *region,
225     unsigned long    offset,
226     int              w,
227     int              h,
228     int              pitch,
229     int              format,
230     unsigned int     flags
231 )
232 {
233     struct i965_driver_data *i965 = i965_driver_data(ctx);
234     struct i965_render_state *render_state = &i965->render_state;
235     void *ss;
236     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
237
238     assert(index < MAX_RENDER_SURFACES);
239
240     dri_bo_map(ss_bo, 1);
241     assert(ss_bo->virtual);
242     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
243
244     gen8_render_set_surface_state(ss,
245                                   region, offset,
246                                   w, h,
247                                   pitch, format, flags);
248     gen8_render_set_surface_scs(ss);
249     dri_bo_emit_reloc(ss_bo,
250                       I915_GEM_DOMAIN_SAMPLER, 0,
251                       offset,
252                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
253                       region);
254
255     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
256     dri_bo_unmap(ss_bo);
257     render_state->wm.sampler_count++;
258 }
259
260 static void
261 gen8_render_src_surfaces_state(
262     VADriverContextP ctx,
263     struct object_surface *obj_surface,
264     unsigned int     flags
265 )
266 {
267     int region_pitch;
268     int rw, rh;
269     dri_bo *region;
270
271     region_pitch = obj_surface->width;
272     rw = obj_surface->orig_width;
273     rh = obj_surface->orig_height;
274     region = obj_surface->bo;
275
276     gen8_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
277     gen8_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
278
279     if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
280         return;
281
282     if (obj_surface->fourcc == VA_FOURCC_NV12) {
283         gen8_render_src_surface_state(ctx, 3, region,
284                                       region_pitch * obj_surface->y_cb_offset,
285                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
286                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
287         gen8_render_src_surface_state(ctx, 4, region,
288                                       region_pitch * obj_surface->y_cb_offset,
289                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
290                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
291     } else {
292         gen8_render_src_surface_state(ctx, 3, region,
293                                       region_pitch * obj_surface->y_cb_offset,
294                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
295                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
296         gen8_render_src_surface_state(ctx, 4, region,
297                                       region_pitch * obj_surface->y_cb_offset,
298                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
299                                       I965_SURFACEFORMAT_R8_UNORM, flags);
300         gen8_render_src_surface_state(ctx, 5, region,
301                                       region_pitch * obj_surface->y_cr_offset,
302                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
303                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
304         gen8_render_src_surface_state(ctx, 6, region,
305                                       region_pitch * obj_surface->y_cr_offset,
306                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
307                                       I965_SURFACEFORMAT_R8_UNORM, flags);
308     }
309 }
310
311 static void
312 gen8_subpic_render_src_surfaces_state(VADriverContextP ctx,
313                                       struct object_surface *obj_surface)
314 {
315     dri_bo *subpic_region;
316     unsigned int index = obj_surface->subpic_render_idx;
317     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
318     struct object_image *obj_image = obj_subpic->obj_image;
319
320     assert(obj_surface);
321     assert(obj_surface->bo);
322     subpic_region = obj_image->bo;
323     /*subpicture surface*/
324     gen8_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
325     gen8_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
326 }
327
328 static void
329 gen8_render_dest_surface_state(VADriverContextP ctx, int index)
330 {
331     struct i965_driver_data *i965 = i965_driver_data(ctx);
332     struct i965_render_state *render_state = &i965->render_state;
333     struct intel_region *dest_region = render_state->draw_region;
334     void *ss;
335     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
336     int format;
337     assert(index < MAX_RENDER_SURFACES);
338
339     if (dest_region->cpp == 2) {
340         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
341     } else {
342         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
343     }
344
345     dri_bo_map(ss_bo, 1);
346     assert(ss_bo->virtual);
347     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
348
349     gen8_render_set_surface_state(ss,
350                                   dest_region->bo, 0,
351                                   dest_region->width, dest_region->height,
352                                   dest_region->pitch, format, 0);
353     gen8_render_set_surface_scs(ss);
354     dri_bo_emit_reloc(ss_bo,
355                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
356                       0,
357                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
358                       dest_region->bo);
359
360     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
361     dri_bo_unmap(ss_bo);
362 }
363
364 static void
365 i965_fill_vertex_buffer(
366     VADriverContextP ctx,
367     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
368     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
369 )
370 {
371     struct i965_driver_data * const i965 = i965_driver_data(ctx);
372     float vb[12];
373
374     enum { X1, Y1, X2, Y2 };
375
376     static const unsigned int g_rotation_indices[][6] = {
377         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
378         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
379         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
380         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
381     };
382
383     const unsigned int * const rotation_indices =
384         g_rotation_indices[i965->rotation_attrib->value];
385
386     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
387     vb[1]  = tex_coords[rotation_indices[1]];
388     vb[2]  = vid_coords[X2];
389     vb[3]  = vid_coords[Y2];
390
391     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
392     vb[5]  = tex_coords[rotation_indices[3]];
393     vb[6]  = vid_coords[X1];
394     vb[7]  = vid_coords[Y2];
395
396     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
397     vb[9]  = tex_coords[rotation_indices[5]];
398     vb[10] = vid_coords[X1];
399     vb[11] = vid_coords[Y1];
400
401     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
402 }
403
404 static void
405 i965_subpic_render_upload_vertex(VADriverContextP ctx,
406                                  struct object_surface *obj_surface,
407                                  const VARectangle *output_rect)
408 {
409     unsigned int index = obj_surface->subpic_render_idx;
410     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
411     float tex_coords[4], vid_coords[4];
412     VARectangle dst_rect;
413
414     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
415         dst_rect = obj_subpic->dst_rect;
416     else {
417         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
418         const float sy  = (float)output_rect->height / obj_surface->orig_height;
419         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
420         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
421         dst_rect.width  = sx * obj_subpic->dst_rect.width;
422         dst_rect.height = sy * obj_subpic->dst_rect.height;
423     }
424
425     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
426     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
427     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
428     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
429
430     vid_coords[0] = dst_rect.x;
431     vid_coords[1] = dst_rect.y;
432     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
433     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
434
435     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
436 }
437
438 static void
439 i965_render_upload_vertex(
440     VADriverContextP   ctx,
441     struct object_surface *obj_surface,
442     const VARectangle *src_rect,
443     const VARectangle *dst_rect
444 )
445 {
446     struct i965_driver_data *i965 = i965_driver_data(ctx);
447     struct i965_render_state *render_state = &i965->render_state;
448     struct intel_region *dest_region = render_state->draw_region;
449     float tex_coords[4], vid_coords[4];
450     int width, height;
451
452     width  = obj_surface->orig_width;
453     height = obj_surface->orig_height;
454
455     tex_coords[0] = (float)src_rect->x / width;
456     tex_coords[1] = (float)src_rect->y / height;
457     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
458     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
459
460     vid_coords[0] = dest_region->x + dst_rect->x;
461     vid_coords[1] = dest_region->y + dst_rect->y;
462     vid_coords[2] = vid_coords[0] + dst_rect->width;
463     vid_coords[3] = vid_coords[1] + dst_rect->height;
464
465     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
466 }
467
468 static void
469 i965_render_drawing_rectangle(VADriverContextP ctx)
470 {
471     struct i965_driver_data *i965 = i965_driver_data(ctx);
472     struct intel_batchbuffer *batch = i965->batch;
473     struct i965_render_state *render_state = &i965->render_state;
474     struct intel_region *dest_region = render_state->draw_region;
475
476     BEGIN_BATCH(batch, 4);
477     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
478     OUT_BATCH(batch, 0x00000000);
479     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
480     OUT_BATCH(batch, 0x00000000);
481     ADVANCE_BATCH(batch);
482 }
483
484 static void
485 i965_render_upload_image_palette(
486     VADriverContextP ctx,
487     struct object_image *obj_image,
488     unsigned int     alpha
489 )
490 {
491     struct i965_driver_data *i965 = i965_driver_data(ctx);
492     struct intel_batchbuffer *batch = i965->batch;
493     unsigned int i;
494
495     assert(obj_image);
496
497     if (!obj_image)
498         return;
499
500     if (obj_image->image.num_palette_entries == 0)
501         return;
502
503     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
504     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
505     /*fill palette*/
506     //int32_t out[16]; //0-23:color 23-31:alpha
507     for (i = 0; i < obj_image->image.num_palette_entries; i++)
508         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
509     ADVANCE_BATCH(batch);
510 }
511
512 static void
513 gen8_clear_dest_region(VADriverContextP ctx)
514 {
515     struct i965_driver_data *i965 = i965_driver_data(ctx);
516     struct intel_batchbuffer *batch = i965->batch;
517     struct i965_render_state *render_state = &i965->render_state;
518     struct intel_region *dest_region = render_state->draw_region;
519     unsigned int blt_cmd, br13;
520     int pitch;
521
522     blt_cmd = GEN8_XY_COLOR_BLT_CMD;
523     br13 = 0xf0 << 16;
524     pitch = dest_region->pitch;
525
526     if (dest_region->cpp == 4) {
527         br13 |= BR13_8888;
528         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
529     } else {
530         assert(dest_region->cpp == 2);
531         br13 |= BR13_565;
532     }
533
534     if (dest_region->tiling != I915_TILING_NONE) {
535         blt_cmd |= XY_COLOR_BLT_DST_TILED;
536         pitch /= 4;
537     }
538
539     br13 |= pitch;
540
541     intel_batchbuffer_start_atomic_blt(batch, 24);
542     BEGIN_BLT_BATCH(batch, 7);
543
544     OUT_BATCH(batch, blt_cmd);
545     OUT_BATCH(batch, br13);
546     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
547     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
548               (dest_region->x + dest_region->width));
549     OUT_RELOC(batch, dest_region->bo,
550               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
551               0);
552     OUT_BATCH(batch, 0x0);
553     OUT_BATCH(batch, 0x0);
554     ADVANCE_BATCH(batch);
555     intel_batchbuffer_end_atomic(batch);
556 }
557
558
559 /*
560  * for GEN8
561  */
562 #define ALIGNMENT       64
563
564 static void
565 gen8_render_initialize(VADriverContextP ctx)
566 {
567     struct i965_driver_data *i965 = i965_driver_data(ctx);
568     struct i965_render_state *render_state = &i965->render_state;
569     dri_bo *bo;
570     int size;
571     unsigned int end_offset;
572
573     /* VERTEX BUFFER */
574     dri_bo_unreference(render_state->vb.vertex_buffer);
575     bo = dri_bo_alloc(i965->intel.bufmgr,
576                       "vertex buffer",
577                       4096,
578                       4096);
579     assert(bo);
580     render_state->vb.vertex_buffer = bo;
581
582     /* WM */
583     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
584     bo = dri_bo_alloc(i965->intel.bufmgr,
585                       "surface state & binding table",
586                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
587                       4096);
588     assert(bo);
589     render_state->wm.surface_state_binding_table_bo = bo;
590
591     render_state->curbe_size = 256;
592
593     render_state->wm.sampler_count = 0;
594
595     render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
596
597     render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
598
599     render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
600
601     render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
602                         16 * sizeof(struct gen8_blend_state_rt);
603
604     render_state->sf_clip_size = 1024;
605
606     render_state->scissor_size = 1024;
607
608     size = ALIGN(render_state->curbe_size, ALIGNMENT) +
609         ALIGN(render_state->sampler_size, ALIGNMENT) +
610         ALIGN(render_state->cc_viewport_size, ALIGNMENT) +
611         ALIGN(render_state->cc_state_size, ALIGNMENT) +
612         ALIGN(render_state->blend_state_size, ALIGNMENT) +
613         ALIGN(render_state->sf_clip_size, ALIGNMENT) +
614         ALIGN(render_state->scissor_size, ALIGNMENT);
615
616     dri_bo_unreference(render_state->dynamic_state.bo);
617     bo = dri_bo_alloc(i965->intel.bufmgr,
618                       "dynamic_state",
619                       size,
620                       4096);
621
622     render_state->dynamic_state.bo = bo;
623
624     end_offset = 0;
625     render_state->dynamic_state.end_offset = 0;
626
627     /* Constant buffer offset */
628     render_state->curbe_offset = end_offset;
629     end_offset += ALIGN(render_state->curbe_size, ALIGNMENT);
630
631     /* Sampler_state  */
632     render_state->sampler_offset = end_offset;
633     end_offset += ALIGN(render_state->sampler_size, ALIGNMENT);
634
635     /* CC_VIEWPORT_state  */
636     render_state->cc_viewport_offset = end_offset;
637     end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT);
638
639     /* CC_STATE_state  */
640     render_state->cc_state_offset = end_offset;
641     end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT);
642
643     /* Blend_state  */
644     render_state->blend_state_offset = end_offset;
645     end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT);
646
647     /* SF_CLIP_state  */
648     render_state->sf_clip_offset = end_offset;
649     end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT);
650
651     /* SCISSOR_state  */
652     render_state->scissor_offset = end_offset;
653     end_offset += ALIGN(render_state->scissor_size, ALIGNMENT);
654
655     /* update the end offset of dynamic_state */
656     render_state->dynamic_state.end_offset = end_offset;
657
658 }
659
660 static void
661 gen8_render_sampler(VADriverContextP ctx)
662 {
663     struct i965_driver_data *i965 = i965_driver_data(ctx);
664     struct i965_render_state *render_state = &i965->render_state;
665     struct gen8_sampler_state *sampler_state;
666     int i;
667     unsigned char *cc_ptr;
668
669     assert(render_state->wm.sampler_count > 0);
670     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
671
672     dri_bo_map(render_state->dynamic_state.bo, 1);
673     assert(render_state->dynamic_state.bo->virtual);
674
675     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
676                         render_state->sampler_offset;
677
678     sampler_state = (struct gen8_sampler_state *) cc_ptr;
679
680     for (i = 0; i < render_state->wm.sampler_count; i++) {
681         memset(sampler_state, 0, sizeof(*sampler_state));
682         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
683         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
684         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
685         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
686         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
687         sampler_state++;
688     }
689
690     dri_bo_unmap(render_state->dynamic_state.bo);
691 }
692
693 static void
694 gen8_render_blend_state(VADriverContextP ctx)
695 {
696     struct i965_driver_data *i965 = i965_driver_data(ctx);
697     struct i965_render_state *render_state = &i965->render_state;
698     struct gen8_global_blend_state *global_blend_state;
699     struct gen8_blend_state_rt *blend_state;
700     unsigned char *cc_ptr;
701
702     dri_bo_map(render_state->dynamic_state.bo, 1);
703     assert(render_state->dynamic_state.bo->virtual);
704
705     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
706                         render_state->blend_state_offset;
707
708     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
709
710     memset(global_blend_state, 0, render_state->blend_state_size);
711     /* Global blend state + blend_state for Render Target */
712     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
713     blend_state->blend1.logic_op_enable = 1;
714     blend_state->blend1.logic_op_func = 0xc;
715     blend_state->blend1.pre_blend_clamp_enable = 1;
716
717     dri_bo_unmap(render_state->dynamic_state.bo);
718 }
719
720
721 static void
722 gen8_render_cc_viewport(VADriverContextP ctx)
723 {
724     struct i965_driver_data *i965 = i965_driver_data(ctx);
725     struct i965_render_state *render_state = &i965->render_state;
726     struct i965_cc_viewport *cc_viewport;
727     unsigned char *cc_ptr;
728
729     dri_bo_map(render_state->dynamic_state.bo, 1);
730     assert(render_state->dynamic_state.bo->virtual);
731
732     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
733                         render_state->cc_viewport_offset;
734
735     cc_viewport = (struct i965_cc_viewport *) cc_ptr;
736
737     memset(cc_viewport, 0, sizeof(*cc_viewport));
738
739     cc_viewport->min_depth = -1.e35;
740     cc_viewport->max_depth = 1.e35;
741
742     dri_bo_unmap(render_state->dynamic_state.bo);
743 }
744
745 static void
746 gen8_render_color_calc_state(VADriverContextP ctx)
747 {
748     struct i965_driver_data *i965 = i965_driver_data(ctx);
749     struct i965_render_state *render_state = &i965->render_state;
750     struct gen6_color_calc_state *color_calc_state;
751     unsigned char *cc_ptr;
752
753     dri_bo_map(render_state->dynamic_state.bo, 1);
754     assert(render_state->dynamic_state.bo->virtual);
755
756     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
757                         render_state->cc_state_offset;
758
759     color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
760
761     memset(color_calc_state, 0, sizeof(*color_calc_state));
762     color_calc_state->constant_r = 1.0;
763     color_calc_state->constant_g = 0.0;
764     color_calc_state->constant_b = 1.0;
765     color_calc_state->constant_a = 1.0;
766     dri_bo_unmap(render_state->dynamic_state.bo);
767 }
768
769 #define PI  3.1415926
770
771 static void
772 gen8_render_upload_constants(VADriverContextP ctx,
773                              struct object_surface *obj_surface,
774                              unsigned int flags)
775 {
776     struct i965_driver_data *i965 = i965_driver_data(ctx);
777     struct i965_render_state *render_state = &i965->render_state;
778     unsigned short *constant_buffer;
779     unsigned char *cc_ptr;
780     float *color_balance_base;
781     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
782     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
783     float hue = (float)i965->hue_attrib->value / 180 * PI;
784     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
785     float *yuv_to_rgb;
786     unsigned int color_flag;
787
788     dri_bo_map(render_state->dynamic_state.bo, 1);
789     assert(render_state->dynamic_state.bo->virtual);
790
791     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
792                         render_state->curbe_offset;
793
794     constant_buffer = (unsigned short *) cc_ptr;
795
796     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
797         assert(obj_surface->fourcc == VA_FOURCC_Y800);
798
799         *constant_buffer = 2;
800     } else {
801         if (obj_surface->fourcc == VA_FOURCC_NV12)
802             *constant_buffer = 1;
803         else
804             *constant_buffer = 0;
805     }
806
807     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
808         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
809         i965->hue_attrib->value == DEFAULT_HUE &&
810         i965->saturation_attrib->value == DEFAULT_SATURATION)
811         constant_buffer[1] = 1; /* skip color balance transformation */
812     else
813         constant_buffer[1] = 0;
814
815     color_balance_base = (float *)constant_buffer + 4;
816     *color_balance_base++ = contrast;
817     *color_balance_base++ = brightness;
818     *color_balance_base++ = cos(hue) * contrast * saturation;
819     *color_balance_base++ = sin(hue) * contrast * saturation;
820
821     color_flag = flags & VA_SRC_COLOR_MASK;
822     yuv_to_rgb = (float *)constant_buffer + 8;
823     if (color_flag == VA_SRC_BT709)
824         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
825     else if (color_flag == VA_SRC_SMPTE_240)
826         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
827     else
828         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
829
830     dri_bo_unmap(render_state->dynamic_state.bo);
831 }
832
833 static void
834 gen8_render_setup_states(
835     VADriverContextP   ctx,
836     struct object_surface *obj_surface,
837     const VARectangle *src_rect,
838     const VARectangle *dst_rect,
839     unsigned int       flags
840 )
841 {
842     gen8_render_dest_surface_state(ctx, 0);
843     gen8_render_src_surfaces_state(ctx, obj_surface, flags);
844     gen8_render_sampler(ctx);
845     gen8_render_cc_viewport(ctx);
846     gen8_render_color_calc_state(ctx);
847     gen8_render_blend_state(ctx);
848     gen8_render_upload_constants(ctx, obj_surface, flags);
849     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
850 }
851
852 static void
853 gen8_emit_state_base_address(VADriverContextP ctx)
854 {
855     struct i965_driver_data *i965 = i965_driver_data(ctx);
856     struct intel_batchbuffer *batch = i965->batch;
857     struct i965_render_state *render_state = &i965->render_state;
858
859     BEGIN_BATCH(batch, 16);
860     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
861     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
862         OUT_BATCH(batch, 0);
863         OUT_BATCH(batch, 0);
864         /*DW4 */
865     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
866         OUT_BATCH(batch, 0);
867
868         /*DW6*/
869     /* Dynamic state base address */
870     OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
871                 0, BASE_ADDRESS_MODIFY);
872     OUT_BATCH(batch, 0);
873
874         /*DW8*/
875     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
876     OUT_BATCH(batch, 0);
877
878         /*DW10 */
879     /* Instruction base address */
880     OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
881     OUT_BATCH(batch, 0);
882
883         /*DW12 */
884     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
885     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
886     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
887     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
888     ADVANCE_BATCH(batch);
889 }
890
891 static void
892 gen8_emit_cc_state_pointers(VADriverContextP ctx)
893 {
894     struct i965_driver_data *i965 = i965_driver_data(ctx);
895     struct intel_batchbuffer *batch = i965->batch;
896     struct i965_render_state *render_state = &i965->render_state;
897
898     BEGIN_BATCH(batch, 2);
899     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
900     OUT_BATCH(batch, (render_state->cc_state_offset + 1));
901     ADVANCE_BATCH(batch);
902
903     BEGIN_BATCH(batch, 2);
904     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
905     OUT_BATCH(batch, (render_state->blend_state_offset + 1));
906     ADVANCE_BATCH(batch);
907
908 }
909
910 static void
911 gen8_emit_vertices(VADriverContextP ctx)
912 {
913     struct i965_driver_data *i965 = i965_driver_data(ctx);
914     struct intel_batchbuffer *batch = i965->batch;
915     struct i965_render_state *render_state = &i965->render_state;
916
917     BEGIN_BATCH(batch, 5);
918     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
919     OUT_BATCH(batch,
920               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
921               (0 << GEN8_VB0_MOCS_SHIFT) |
922               GEN7_VB0_ADDRESS_MODIFYENABLE |
923               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
924     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
925     OUT_BATCH(batch, 0);
926     OUT_BATCH(batch, 12 * 4);
927     ADVANCE_BATCH(batch);
928
929     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
930     BEGIN_BATCH(batch, 2);
931     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
932     OUT_BATCH(batch,
933               _3DPRIM_RECTLIST);
934     ADVANCE_BATCH(batch);
935
936     BEGIN_BATCH(batch, 7);
937     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
938     OUT_BATCH(batch,
939               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
940     OUT_BATCH(batch, 3); /* vertex count per instance */
941     OUT_BATCH(batch, 0); /* start vertex offset */
942     OUT_BATCH(batch, 1); /* single instance */
943     OUT_BATCH(batch, 0); /* start instance location */
944     OUT_BATCH(batch, 0);
945     ADVANCE_BATCH(batch);
946 }
947
948 static void
949 gen8_emit_vertex_element_state(VADriverContextP ctx)
950 {
951     struct i965_driver_data *i965 = i965_driver_data(ctx);
952     struct intel_batchbuffer *batch = i965->batch;
953
954     /*
955      * The VUE layout
956      * dword 0-3: pad (0, 0, 0. 0)
957      * dword 4-7: position (x, y, 1.0, 1.0),
958      * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
959      */
960
961     /* Set up our vertex elements, sourced from the single vertex buffer. */
962     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
963
964     /* Element state 0. These are 4 dwords of 0 required for the VUE format.
965      * We don't really know or care what they do.
966      */
967
968     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
969               GEN8_VE0_VALID |
970               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
971               (0 << VE0_OFFSET_SHIFT));
972     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
973               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
974               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
975               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
976
977     /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
978     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
979               GEN8_VE0_VALID |
980               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
981               (8 << VE0_OFFSET_SHIFT));
982     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
983               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
984               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
985               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
986
987     /* offset 0: u,v -> {U, V, 1.0, 1.0} */
988     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
989               GEN8_VE0_VALID |
990               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
991               (0 << VE0_OFFSET_SHIFT));
992     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
993               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
994               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
995               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
996 }
997
998 static void
999 gen8_emit_vs_state(VADriverContextP ctx)
1000 {
1001     struct i965_driver_data *i965 = i965_driver_data(ctx);
1002     struct intel_batchbuffer *batch = i965->batch;
1003
1004     /* disable VS constant buffer */
1005     BEGIN_BATCH(batch, 11);
1006     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
1007     OUT_BATCH(batch, 0);
1008     OUT_BATCH(batch, 0);
1009     /* CS Buffer 0 */
1010     OUT_BATCH(batch, 0);
1011     OUT_BATCH(batch, 0);
1012     /* CS Buffer 1 */
1013     OUT_BATCH(batch, 0);
1014     OUT_BATCH(batch, 0);
1015     /* CS Buffer 2 */
1016     OUT_BATCH(batch, 0);
1017     OUT_BATCH(batch, 0);
1018     /* CS Buffer 3 */
1019     OUT_BATCH(batch, 0);
1020     OUT_BATCH(batch, 0);
1021     ADVANCE_BATCH(batch);
1022
1023     BEGIN_BATCH(batch, 9);
1024     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
1025     OUT_BATCH(batch, 0); /* without VS kernel */
1026     OUT_BATCH(batch, 0);
1027     /* VS shader dispatch flag */
1028     OUT_BATCH(batch, 0);
1029     OUT_BATCH(batch, 0);
1030     OUT_BATCH(batch, 0);
1031     /* DW6. VS shader GRF and URB buffer definition */
1032     OUT_BATCH(batch, 0);
1033     OUT_BATCH(batch, 0); /* pass-through */
1034     OUT_BATCH(batch, 0);
1035     ADVANCE_BATCH(batch);
1036
1037     BEGIN_BATCH(batch, 2);
1038     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
1039     OUT_BATCH(batch, 0);
1040     ADVANCE_BATCH(batch);
1041
1042     BEGIN_BATCH(batch, 2);
1043     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
1044     OUT_BATCH(batch, 0);
1045     ADVANCE_BATCH(batch);
1046
1047 }
1048
1049 /*
1050  * URB layout on GEN8
1051  * ----------------------------------------
1052  * | PS Push Constants (8KB) | VS entries |
1053  * ----------------------------------------
1054  */
1055 static void
1056 gen8_emit_urb(VADriverContextP ctx)
1057 {
1058     struct i965_driver_data *i965 = i965_driver_data(ctx);
1059     struct intel_batchbuffer *batch = i965->batch;
1060     unsigned int num_urb_entries = 64;
1061
1062     /* The minimum urb entries is 64 */
1063
1064     BEGIN_BATCH(batch, 2);
1065     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
1066     OUT_BATCH(batch, 0);
1067     ADVANCE_BATCH(batch);
1068
1069     BEGIN_BATCH(batch, 2);
1070     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
1071     OUT_BATCH(batch, 0);
1072     ADVANCE_BATCH(batch);
1073
1074     BEGIN_BATCH(batch, 2);
1075     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
1076     OUT_BATCH(batch, 0);
1077     ADVANCE_BATCH(batch);
1078
1079     BEGIN_BATCH(batch, 2);
1080     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
1081     OUT_BATCH(batch, 0);
1082     ADVANCE_BATCH(batch);
1083
1084     /* Size is 8Kbs and base address is 0Kb */
1085     BEGIN_BATCH(batch, 2);
1086     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
1087     /* Size is 8Kbs and base address is 0Kb */
1088     OUT_BATCH(batch,
1089                 (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
1090                 (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
1091     ADVANCE_BATCH(batch);
1092
1093     BEGIN_BATCH(batch, 2);
1094     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
1095     OUT_BATCH(batch,
1096               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
1097               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
1098               (4 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1099    ADVANCE_BATCH(batch);
1100
1101    BEGIN_BATCH(batch, 2);
1102    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
1103    OUT_BATCH(batch,
1104              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1105              (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1106    ADVANCE_BATCH(batch);
1107
1108    BEGIN_BATCH(batch, 2);
1109    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
1110    OUT_BATCH(batch,
1111              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1112              (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1113    ADVANCE_BATCH(batch);
1114
1115    BEGIN_BATCH(batch, 2);
1116    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
1117    OUT_BATCH(batch,
1118              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1119              (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1120    ADVANCE_BATCH(batch);
1121 }
1122
1123 static void
1124 gen8_emit_bypass_state(VADriverContextP ctx)
1125 {
1126     struct i965_driver_data *i965 = i965_driver_data(ctx);
1127     struct intel_batchbuffer *batch = i965->batch;
1128
1129     /* bypass GS */
1130     BEGIN_BATCH(batch, 11);
1131     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
1132     OUT_BATCH(batch, 0);
1133     OUT_BATCH(batch, 0);
1134     OUT_BATCH(batch, 0);
1135     OUT_BATCH(batch, 0);
1136     OUT_BATCH(batch, 0);
1137     OUT_BATCH(batch, 0);
1138     OUT_BATCH(batch, 0);
1139     OUT_BATCH(batch, 0);
1140     OUT_BATCH(batch, 0);
1141     OUT_BATCH(batch, 0);
1142     ADVANCE_BATCH(batch);
1143
1144     BEGIN_BATCH(batch, 10);
1145     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
1146     /* GS shader address */
1147     OUT_BATCH(batch, 0); /* without GS kernel */
1148     OUT_BATCH(batch, 0);
1149     /* DW3. GS shader dispatch flag */
1150     OUT_BATCH(batch, 0);
1151     OUT_BATCH(batch, 0);
1152     OUT_BATCH(batch, 0);
1153     /* DW6. GS shader GRF and URB offset/length */
1154     OUT_BATCH(batch, 0);
1155     OUT_BATCH(batch, 0); /* pass-through */
1156     OUT_BATCH(batch, 0);
1157     OUT_BATCH(batch, 0);
1158     ADVANCE_BATCH(batch);
1159
1160     BEGIN_BATCH(batch, 2);
1161     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
1162     OUT_BATCH(batch, 0);
1163     ADVANCE_BATCH(batch);
1164
1165     BEGIN_BATCH(batch, 2);
1166     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
1167     OUT_BATCH(batch, 0);
1168     ADVANCE_BATCH(batch);
1169
1170     /* disable HS */
1171     BEGIN_BATCH(batch, 11);
1172     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
1173     OUT_BATCH(batch, 0);
1174     OUT_BATCH(batch, 0);
1175     OUT_BATCH(batch, 0);
1176     OUT_BATCH(batch, 0);
1177     OUT_BATCH(batch, 0);
1178     OUT_BATCH(batch, 0);
1179     OUT_BATCH(batch, 0);
1180     OUT_BATCH(batch, 0);
1181     OUT_BATCH(batch, 0);
1182     OUT_BATCH(batch, 0);
1183     ADVANCE_BATCH(batch);
1184
1185     BEGIN_BATCH(batch, 9);
1186     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
1187     OUT_BATCH(batch, 0);
1188     /*DW2. HS pass-through */
1189     OUT_BATCH(batch, 0);
1190     /*DW3. HS shader address */
1191     OUT_BATCH(batch, 0);
1192     OUT_BATCH(batch, 0);
1193     /*DW5. HS shader flag. URB offset/length and so on */
1194     OUT_BATCH(batch, 0);
1195     OUT_BATCH(batch, 0);
1196     OUT_BATCH(batch, 0);
1197     OUT_BATCH(batch, 0);
1198     ADVANCE_BATCH(batch);
1199
1200     BEGIN_BATCH(batch, 2);
1201     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
1202     OUT_BATCH(batch, 0);
1203     ADVANCE_BATCH(batch);
1204
1205     BEGIN_BATCH(batch, 2);
1206     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
1207     OUT_BATCH(batch, 0);
1208     ADVANCE_BATCH(batch);
1209
1210     /* Disable TE */
1211     BEGIN_BATCH(batch, 4);
1212     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
1213     OUT_BATCH(batch, 0);
1214     OUT_BATCH(batch, 0);
1215     OUT_BATCH(batch, 0);
1216     ADVANCE_BATCH(batch);
1217
1218     /* Disable DS */
1219     BEGIN_BATCH(batch, 11);
1220     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
1221     OUT_BATCH(batch, 0);
1222     OUT_BATCH(batch, 0);
1223     OUT_BATCH(batch, 0);
1224     OUT_BATCH(batch, 0);
1225     OUT_BATCH(batch, 0);
1226     OUT_BATCH(batch, 0);
1227     OUT_BATCH(batch, 0);
1228     OUT_BATCH(batch, 0);
1229     OUT_BATCH(batch, 0);
1230     OUT_BATCH(batch, 0);
1231     ADVANCE_BATCH(batch);
1232
1233     BEGIN_BATCH(batch, 9);
1234     OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2));
1235     /* DW1. DS shader pointer */
1236     OUT_BATCH(batch, 0);
1237     OUT_BATCH(batch, 0);
1238     /* DW3-5. DS shader dispatch flag.*/
1239     OUT_BATCH(batch, 0);
1240     OUT_BATCH(batch, 0);
1241     OUT_BATCH(batch, 0);
1242     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
1243     OUT_BATCH(batch, 0);
1244     OUT_BATCH(batch, 0);
1245     /* DW8. DS shader output URB */
1246     OUT_BATCH(batch, 0);
1247     ADVANCE_BATCH(batch);
1248
1249     BEGIN_BATCH(batch, 2);
1250     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
1251     OUT_BATCH(batch, 0);
1252     ADVANCE_BATCH(batch);
1253
1254     BEGIN_BATCH(batch, 2);
1255     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
1256     OUT_BATCH(batch, 0);
1257     ADVANCE_BATCH(batch);
1258
1259     /* Disable STREAMOUT */
1260     BEGIN_BATCH(batch, 5);
1261     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
1262     OUT_BATCH(batch, 0);
1263     OUT_BATCH(batch, 0);
1264     OUT_BATCH(batch, 0);
1265     OUT_BATCH(batch, 0);
1266     ADVANCE_BATCH(batch);
1267 }
1268
1269 static void
1270 gen8_emit_invarient_states(VADriverContextP ctx)
1271 {
1272     struct i965_driver_data *i965 = i965_driver_data(ctx);
1273     struct intel_batchbuffer *batch = i965->batch;
1274
1275     BEGIN_BATCH(batch, 1);
1276     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1277     ADVANCE_BATCH(batch);
1278
1279     BEGIN_BATCH(batch, 2);
1280     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
1281     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1282               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1283     ADVANCE_BATCH(batch);
1284
1285     /* Update 3D Multisample pattern */
1286     BEGIN_BATCH(batch, 9);
1287     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
1288     OUT_BATCH(batch, 0);
1289     OUT_BATCH(batch, 0);
1290     OUT_BATCH(batch, 0);
1291     OUT_BATCH(batch, 0);
1292     OUT_BATCH(batch, 0);
1293     OUT_BATCH(batch, 0);
1294     OUT_BATCH(batch, 0);
1295     OUT_BATCH(batch, 0);
1296     ADVANCE_BATCH(batch);
1297
1298
1299     BEGIN_BATCH(batch, 2);
1300     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1301     OUT_BATCH(batch, 1);
1302     ADVANCE_BATCH(batch);
1303
1304     /* Set system instruction pointer */
1305     BEGIN_BATCH(batch, 3);
1306     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1307     OUT_BATCH(batch, 0);
1308     OUT_BATCH(batch, 0);
1309     ADVANCE_BATCH(batch);
1310 }
1311
1312 static void
1313 gen8_emit_clip_state(VADriverContextP ctx)
1314 {
1315     struct i965_driver_data *i965 = i965_driver_data(ctx);
1316     struct intel_batchbuffer *batch = i965->batch;
1317
1318     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1319     OUT_BATCH(batch, 0);
1320     OUT_BATCH(batch, 0); /* pass-through */
1321     OUT_BATCH(batch, 0);
1322 }
1323
1324 static void
1325 gen8_emit_sf_state(VADriverContextP ctx)
1326 {
1327     struct i965_driver_data *i965 = i965_driver_data(ctx);
1328     struct intel_batchbuffer *batch = i965->batch;
1329
1330     BEGIN_BATCH(batch, 5);
1331     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
1332     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
1333     OUT_BATCH(batch, 0);
1334     OUT_BATCH(batch, 0);
1335     OUT_BATCH(batch, 0);
1336     ADVANCE_BATCH(batch);
1337
1338
1339     BEGIN_BATCH(batch, 4);
1340     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2));
1341     OUT_BATCH(batch,
1342               (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
1343               (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
1344               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
1345               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
1346               (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
1347     OUT_BATCH(batch, 0);
1348     OUT_BATCH(batch, 0);
1349     ADVANCE_BATCH(batch);
1350
1351     /* SBE for backend setup */
1352     BEGIN_BATCH(batch, 11);
1353     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
1354     OUT_BATCH(batch, 0);
1355     OUT_BATCH(batch, 0);
1356     OUT_BATCH(batch, 0);
1357     OUT_BATCH(batch, 0);
1358     OUT_BATCH(batch, 0);
1359     OUT_BATCH(batch, 0);
1360     OUT_BATCH(batch, 0);
1361     OUT_BATCH(batch, 0);
1362     OUT_BATCH(batch, 0);
1363     OUT_BATCH(batch, 0);
1364     ADVANCE_BATCH(batch);
1365
1366     BEGIN_BATCH(batch, 4);
1367     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
1368     OUT_BATCH(batch, 0);
1369     OUT_BATCH(batch, 0);
1370     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
1371     ADVANCE_BATCH(batch);
1372 }
1373
1374 static void
1375 gen8_emit_wm_state(VADriverContextP ctx, int kernel)
1376 {
1377     struct i965_driver_data *i965 = i965_driver_data(ctx);
1378     struct intel_batchbuffer *batch = i965->batch;
1379     struct i965_render_state *render_state = &i965->render_state;
1380     unsigned int num_samples = 0;
1381     unsigned int max_threads;
1382
1383     max_threads = i965->intel.device_info->max_wm_threads - 2;
1384
1385     BEGIN_BATCH(batch, 2);
1386     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
1387     OUT_BATCH(batch,
1388               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
1389     ADVANCE_BATCH(batch);
1390
1391     if (kernel == PS_KERNEL) {
1392         BEGIN_BATCH(batch, 2);
1393         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1394         OUT_BATCH(batch,
1395                 GEN8_PS_BLEND_HAS_WRITEABLE_RT);
1396         ADVANCE_BATCH(batch);
1397     } else if (kernel == PS_SUBPIC_KERNEL) {
1398         BEGIN_BATCH(batch, 2);
1399         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1400         OUT_BATCH(batch,
1401                 (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
1402                  GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
1403                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
1404                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
1405                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
1406                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
1407         ADVANCE_BATCH(batch);
1408     }
1409
1410     BEGIN_BATCH(batch, 2);
1411     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
1412     OUT_BATCH(batch,
1413               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1414     ADVANCE_BATCH(batch);
1415
1416     BEGIN_BATCH(batch, 11);
1417     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
1418     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
1419     OUT_BATCH(batch, 0);
1420     /*DW3-4. Constant buffer 0 */
1421     OUT_BATCH(batch, render_state->curbe_offset);
1422     OUT_BATCH(batch, 0);
1423
1424     /*DW5-10. Constant buffer 1-3 */
1425     OUT_BATCH(batch, 0);
1426     OUT_BATCH(batch, 0);
1427     OUT_BATCH(batch, 0);
1428     OUT_BATCH(batch, 0);
1429     OUT_BATCH(batch, 0);
1430     OUT_BATCH(batch, 0);
1431     ADVANCE_BATCH(batch);
1432
1433     BEGIN_BATCH(batch, 12);
1434     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
1435     /* PS shader address */
1436     OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
1437
1438     OUT_BATCH(batch, 0);
1439     /* DW3. PS shader flag .Binding table cnt/sample cnt */
1440     OUT_BATCH(batch,
1441               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
1442               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
1443     /* DW4-5. Scatch space */
1444     OUT_BATCH(batch, 0); /* scratch space base offset */
1445     OUT_BATCH(batch, 0);
1446     /* DW6. PS shader threads. */
1447     OUT_BATCH(batch,
1448               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
1449               GEN7_PS_PUSH_CONSTANT_ENABLE |
1450               GEN7_PS_16_DISPATCH_ENABLE);
1451     /* DW7. PS shader GRF */
1452     OUT_BATCH(batch,
1453               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
1454     OUT_BATCH(batch, 0); /* kernel 1 pointer */
1455     OUT_BATCH(batch, 0);
1456     OUT_BATCH(batch, 0); /* kernel 2 pointer */
1457     OUT_BATCH(batch, 0);
1458     ADVANCE_BATCH(batch);
1459
1460     BEGIN_BATCH(batch, 2);
1461     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
1462     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1463     ADVANCE_BATCH(batch);
1464 }
1465
1466 static void
1467 gen8_emit_depth_buffer_state(VADriverContextP ctx)
1468 {
1469     struct i965_driver_data *i965 = i965_driver_data(ctx);
1470     struct intel_batchbuffer *batch = i965->batch;
1471
1472     BEGIN_BATCH(batch, 8);
1473     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
1474     OUT_BATCH(batch,
1475               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
1476               (I965_SURFACE_NULL << 29));
1477     /* DW2-3. Depth Buffer Address */
1478     OUT_BATCH(batch, 0);
1479     OUT_BATCH(batch, 0);
1480     /* DW4-7. Surface structure */
1481     OUT_BATCH(batch, 0);
1482     OUT_BATCH(batch, 0);
1483     OUT_BATCH(batch, 0);
1484     OUT_BATCH(batch, 0);
1485     ADVANCE_BATCH(batch);
1486
1487     /* Update the Hier Depth buffer */
1488     BEGIN_BATCH(batch, 5);
1489     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
1490     OUT_BATCH(batch, 0);
1491     OUT_BATCH(batch, 0);
1492     OUT_BATCH(batch, 0);
1493     OUT_BATCH(batch, 0);
1494     ADVANCE_BATCH(batch);
1495
1496     /* Update the stencil buffer */
1497     BEGIN_BATCH(batch, 5);
1498     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
1499     OUT_BATCH(batch, 0);
1500     OUT_BATCH(batch, 0);
1501     OUT_BATCH(batch, 0);
1502     OUT_BATCH(batch, 0);
1503     ADVANCE_BATCH(batch);
1504
1505     BEGIN_BATCH(batch, 3);
1506     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
1507     OUT_BATCH(batch, 0);
1508     OUT_BATCH(batch, 0);
1509     ADVANCE_BATCH(batch);
1510 }
1511
1512 static void
1513 gen8_emit_depth_stencil_state(VADriverContextP ctx)
1514 {
1515     struct i965_driver_data *i965 = i965_driver_data(ctx);
1516     struct intel_batchbuffer *batch = i965->batch;
1517
1518     BEGIN_BATCH(batch, 3);
1519     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
1520     OUT_BATCH(batch, 0);
1521     OUT_BATCH(batch, 0);
1522     ADVANCE_BATCH(batch);
1523 }
1524
1525 static void
1526 gen8_emit_wm_hz_op(VADriverContextP ctx)
1527 {
1528     struct i965_driver_data *i965 = i965_driver_data(ctx);
1529     struct intel_batchbuffer *batch = i965->batch;
1530
1531     BEGIN_BATCH(batch, 5);
1532     OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
1533     OUT_BATCH(batch, 0);
1534     OUT_BATCH(batch, 0);
1535     OUT_BATCH(batch, 0);
1536     OUT_BATCH(batch, 0);
1537     ADVANCE_BATCH(batch);
1538 }
1539
1540 static void
1541 gen8_emit_viewport_state_pointers(VADriverContextP ctx)
1542 {
1543     struct i965_driver_data *i965 = i965_driver_data(ctx);
1544     struct intel_batchbuffer *batch = i965->batch;
1545     struct i965_render_state *render_state = &i965->render_state;
1546
1547     BEGIN_BATCH(batch, 2);
1548     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
1549     OUT_BATCH(batch, render_state->cc_viewport_offset);
1550     ADVANCE_BATCH(batch);
1551
1552     BEGIN_BATCH(batch, 2);
1553     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
1554     OUT_BATCH(batch, 0);
1555     ADVANCE_BATCH(batch);
1556 }
1557
1558 static void
1559 gen8_emit_sampler_state_pointers(VADriverContextP ctx)
1560 {
1561     struct i965_driver_data *i965 = i965_driver_data(ctx);
1562     struct intel_batchbuffer *batch = i965->batch;
1563     struct i965_render_state *render_state = &i965->render_state;
1564
1565     BEGIN_BATCH(batch, 2);
1566     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
1567     OUT_BATCH(batch, render_state->sampler_offset);
1568     ADVANCE_BATCH(batch);
1569 }
1570
1571
1572 static void
1573 gen7_emit_drawing_rectangle(VADriverContextP ctx)
1574 {
1575     i965_render_drawing_rectangle(ctx);
1576 }
1577
1578 static void
1579 gen8_render_emit_states(VADriverContextP ctx, int kernel)
1580 {
1581     struct i965_driver_data *i965 = i965_driver_data(ctx);
1582     struct intel_batchbuffer *batch = i965->batch;
1583
1584     intel_batchbuffer_start_atomic(batch, 0x1000);
1585     intel_batchbuffer_emit_mi_flush(batch);
1586     gen8_emit_invarient_states(ctx);
1587     gen8_emit_state_base_address(ctx);
1588     gen8_emit_viewport_state_pointers(ctx);
1589     gen8_emit_urb(ctx);
1590     gen8_emit_cc_state_pointers(ctx);
1591     gen8_emit_sampler_state_pointers(ctx);
1592     gen8_emit_wm_hz_op(ctx);
1593     gen8_emit_bypass_state(ctx);
1594     gen8_emit_vs_state(ctx);
1595     gen8_emit_clip_state(ctx);
1596     gen8_emit_sf_state(ctx);
1597     gen8_emit_depth_stencil_state(ctx);
1598     gen8_emit_wm_state(ctx, kernel);
1599     gen8_emit_depth_buffer_state(ctx);
1600     gen7_emit_drawing_rectangle(ctx);
1601     gen8_emit_vertex_element_state(ctx);
1602     gen8_emit_vertices(ctx);
1603     intel_batchbuffer_end_atomic(batch);
1604 }
1605
1606 static void
1607 gen8_render_put_surface(
1608     VADriverContextP   ctx,
1609     struct object_surface *obj_surface,
1610     const VARectangle *src_rect,
1611     const VARectangle *dst_rect,
1612     unsigned int       flags
1613 )
1614 {
1615     struct i965_driver_data *i965 = i965_driver_data(ctx);
1616     struct intel_batchbuffer *batch = i965->batch;
1617
1618     gen8_render_initialize(ctx);
1619     gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
1620     gen8_clear_dest_region(ctx);
1621     gen8_render_emit_states(ctx, PS_KERNEL);
1622     intel_batchbuffer_flush(batch);
1623 }
1624
1625 static void
1626 gen8_subpicture_render_blend_state(VADriverContextP ctx)
1627 {
1628     struct i965_driver_data *i965 = i965_driver_data(ctx);
1629     struct i965_render_state *render_state = &i965->render_state;
1630     struct gen8_global_blend_state *global_blend_state;
1631     struct gen8_blend_state_rt *blend_state;
1632     unsigned char *cc_ptr;
1633
1634     dri_bo_map(render_state->dynamic_state.bo, 1);
1635     assert(render_state->dynamic_state.bo->virtual);
1636
1637     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1638                         render_state->blend_state_offset;
1639
1640     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
1641
1642     memset(global_blend_state, 0, render_state->blend_state_size);
1643     /* Global blend state + blend_state for Render Target */
1644     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
1645     blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
1646     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1647     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1648     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
1649     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1650     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1651     blend_state->blend0.colorbuf_blend = 1;
1652     blend_state->blend1.post_blend_clamp_enable = 1;
1653     blend_state->blend1.pre_blend_clamp_enable = 1;
1654     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
1655
1656     dri_bo_unmap(render_state->dynamic_state.bo);
1657 }
1658
1659 static void
1660 gen8_subpic_render_upload_constants(VADriverContextP ctx,
1661                                     struct object_surface *obj_surface)
1662 {
1663     struct i965_driver_data *i965 = i965_driver_data(ctx);
1664     struct i965_render_state *render_state = &i965->render_state;
1665     float *constant_buffer;
1666     float global_alpha = 1.0;
1667     unsigned int index = obj_surface->subpic_render_idx;
1668     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1669     unsigned char *cc_ptr;
1670
1671     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1672         global_alpha = obj_subpic->global_alpha;
1673     }
1674
1675
1676     dri_bo_map(render_state->dynamic_state.bo, 1);
1677     assert(render_state->dynamic_state.bo->virtual);
1678
1679     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1680                                 render_state->curbe_offset;
1681
1682     constant_buffer = (float *) cc_ptr;
1683     *constant_buffer = global_alpha;
1684
1685     dri_bo_unmap(render_state->dynamic_state.bo);
1686 }
1687
1688 static void
1689 gen8_subpicture_render_setup_states(
1690     VADriverContextP   ctx,
1691     struct object_surface *obj_surface,
1692     const VARectangle *src_rect,
1693     const VARectangle *dst_rect
1694 )
1695 {
1696     gen8_render_dest_surface_state(ctx, 0);
1697     gen8_subpic_render_src_surfaces_state(ctx, obj_surface);
1698     gen8_render_sampler(ctx);
1699     gen8_render_cc_viewport(ctx);
1700     gen8_render_color_calc_state(ctx);
1701     gen8_subpicture_render_blend_state(ctx);
1702     gen8_subpic_render_upload_constants(ctx, obj_surface);
1703     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1704 }
1705
1706 static void
1707 gen8_render_put_subpicture(
1708     VADriverContextP   ctx,
1709     struct object_surface *obj_surface,
1710     const VARectangle *src_rect,
1711     const VARectangle *dst_rect
1712 )
1713 {
1714     struct i965_driver_data *i965 = i965_driver_data(ctx);
1715     struct intel_batchbuffer *batch = i965->batch;
1716     unsigned int index = obj_surface->subpic_render_idx;
1717     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1718
1719     assert(obj_subpic);
1720     gen8_render_initialize(ctx);
1721     gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
1722     gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
1723     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1724     intel_batchbuffer_flush(batch);
1725 }
1726
1727 static void
1728 gen8_render_terminate(VADriverContextP ctx)
1729 {
1730     struct i965_driver_data *i965 = i965_driver_data(ctx);
1731     struct i965_render_state *render_state = &i965->render_state;
1732
1733     dri_bo_unreference(render_state->vb.vertex_buffer);
1734     render_state->vb.vertex_buffer = NULL;
1735
1736     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1737     render_state->wm.surface_state_binding_table_bo = NULL;
1738
1739     if (render_state->instruction_state.bo) {
1740         dri_bo_unreference(render_state->instruction_state.bo);
1741         render_state->instruction_state.bo = NULL;
1742     }
1743
1744     if (render_state->dynamic_state.bo) {
1745         dri_bo_unreference(render_state->dynamic_state.bo);
1746         render_state->dynamic_state.bo = NULL;
1747     }
1748
1749     if (render_state->indirect_state.bo) {
1750         dri_bo_unreference(render_state->indirect_state.bo);
1751         render_state->indirect_state.bo = NULL;
1752     }
1753
1754     if (render_state->draw_region) {
1755         dri_bo_unreference(render_state->draw_region->bo);
1756         free(render_state->draw_region);
1757         render_state->draw_region = NULL;
1758     }
1759 }
1760
1761 bool
1762 gen8_render_init(VADriverContextP ctx)
1763 {
1764     struct i965_driver_data *i965 = i965_driver_data(ctx);
1765     struct i965_render_state *render_state = &i965->render_state;
1766     int i, kernel_size;
1767     unsigned int kernel_offset, end_offset;
1768     unsigned char *kernel_ptr;
1769     struct i965_kernel *kernel;
1770
1771     render_state->render_put_surface = gen8_render_put_surface;
1772     render_state->render_put_subpicture = gen8_render_put_subpicture;
1773     render_state->render_terminate = gen8_render_terminate;
1774
1775     memcpy(render_state->render_kernels, render_kernels_gen8,
1776            sizeof(render_state->render_kernels));
1777
1778     kernel_size = 4096;
1779
1780     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1781         kernel = &render_state->render_kernels[i];
1782
1783         if (!kernel->size)
1784             continue;
1785
1786         kernel_size += kernel->size;
1787     }
1788
1789     render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1790                                   "kernel shader",
1791                                   kernel_size,
1792                                   0x1000);
1793     if (render_state->instruction_state.bo == NULL) {
1794         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1795         return false;
1796     }
1797
1798     assert(render_state->instruction_state.bo);
1799
1800     render_state->instruction_state.bo_size = kernel_size;
1801     render_state->instruction_state.end_offset = 0;
1802     end_offset = 0;
1803
1804     dri_bo_map(render_state->instruction_state.bo, 1);
1805     kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
1806     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1807         kernel = &render_state->render_kernels[i];
1808         kernel_offset = end_offset;
1809         kernel->kernel_offset = kernel_offset;
1810
1811         if (!kernel->size)
1812             continue;
1813
1814         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1815
1816         end_offset += ALIGN(kernel->size, ALIGNMENT);
1817     }
1818
1819     render_state->instruction_state.end_offset = end_offset;
1820
1821     dri_bo_unmap(render_state->instruction_state.bo);
1822
1823     return true;
1824 }