Remove max_wm_threads from render_state
[platform/upstream/libva-intel-driver.git] / src / gen8_render.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *    Zhao Yakui <yakui.zhao@intel.com>
28  *
29  */
30
31 /*
32  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
33  */
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <assert.h>
39 #include <math.h>
40
41 #include <va/va_drmcommon.h>
42
43 #include "intel_batchbuffer.h"
44 #include "intel_driver.h"
45 #include "i965_defines.h"
46 #include "i965_drv_video.h"
47 #include "i965_structs.h"
48
49 #include "i965_render.h"
50
51 #define SF_KERNEL_NUM_GRF       16
52 #define SF_MAX_THREADS          1
53
54 #define PS_KERNEL_NUM_GRF       48
55 #define PS_MAX_THREADS          32
56
57 /* Programs for Gen8 */
58 static const uint32_t sf_kernel_static_gen8[][4] ={
59
60 };
61 static const uint32_t ps_kernel_static_gen8[][4] = {
62 #include "shaders/render/exa_wm_src_affine.g8b"
63 #include "shaders/render/exa_wm_src_sample_planar.g8b"
64 #include "shaders/render/exa_wm_yuv_color_balance.g8b"
65 #include "shaders/render/exa_wm_yuv_rgb.g8b"
66 #include "shaders/render/exa_wm_write.g8b"
67 };
68
69 static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
70 #include "shaders/render/exa_wm_src_affine.g8b"
71 #include "shaders/render/exa_wm_src_sample_argb.g8b"
72 #include "shaders/render/exa_wm_write.g8b"
73 };
74
75
76 #define SURFACE_STATE_PADDED_SIZE       SURFACE_STATE_PADDED_SIZE_GEN8
77
78 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
79 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
80
81 enum {
82     SF_KERNEL = 0,
83     PS_KERNEL,
84     PS_SUBPIC_KERNEL
85 };
86
87 static struct i965_kernel render_kernels_gen8[] = {
88     {
89         "SF",
90         SF_KERNEL,
91         sf_kernel_static_gen8,
92         sizeof(sf_kernel_static_gen8),
93         NULL
94     },
95     {
96         "PS",
97         PS_KERNEL,
98         ps_kernel_static_gen8,
99         sizeof(ps_kernel_static_gen8),
100         NULL
101     },
102
103     {
104         "PS_SUBPIC",
105         PS_SUBPIC_KERNEL,
106         ps_subpic_kernel_static_gen8,
107         sizeof(ps_subpic_kernel_static_gen8),
108         NULL
109     }
110 };
111
112 #define URB_VS_ENTRIES        8
113 #define URB_VS_ENTRY_SIZE     1
114
115 #define URB_GS_ENTRIES        0
116 #define URB_GS_ENTRY_SIZE     0
117
118 #define URB_CLIP_ENTRIES      0
119 #define URB_CLIP_ENTRY_SIZE   0
120
121 #define URB_SF_ENTRIES        1
122 #define URB_SF_ENTRY_SIZE     2
123
124 #define URB_CS_ENTRIES        4
125 #define URB_CS_ENTRY_SIZE     4
126
127 static float yuv_to_rgb_bt601[3][4] = {
128 {1.164,         0,      1.596,          -0.06275,},
129 {1.164,         -0.392, -0.813,         -0.50196,},
130 {1.164,         2.017,  0,              -0.50196,},
131 };
132
133 static float yuv_to_rgb_bt709[3][4] = {
134 {1.164,         0,      1.793,          -0.06275,},
135 {1.164,         -0.213, -0.533,         -0.50196,},
136 {1.164,         2.112,  0,              -0.50196,},
137 };
138
139 static float yuv_to_rgb_smpte_240[3][4] = {
140 {1.164,         0,      1.794,          -0.06275,},
141 {1.164,         -0.258, -0.5425,        -0.50196,},
142 {1.164,         2.078,  0,              -0.50196,},
143 };
144
145
146 static void
147 gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
148 {
149    switch (tiling) {
150    case I915_TILING_NONE:
151       ss->ss0.tiled_surface = 0;
152       ss->ss0.tile_walk = 0;
153       break;
154    case I915_TILING_X:
155       ss->ss0.tiled_surface = 1;
156       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
157       break;
158    case I915_TILING_Y:
159       ss->ss0.tiled_surface = 1;
160       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
161       break;
162    }
163 }
164
165 /* Set "Shader Channel Select" for GEN8+ */
166 void
167 gen8_render_set_surface_scs(struct gen8_surface_state *ss)
168 {
169     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
170     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
171     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
172     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
173 }
174
175 static void
176 gen8_render_set_surface_state(
177     struct gen8_surface_state *ss,
178     dri_bo                    *bo,
179     unsigned long              offset,
180     int                        width,
181     int                        height,
182     int                        pitch,
183     int                        format,
184     unsigned int               flags
185 )
186 {
187     unsigned int tiling;
188     unsigned int swizzle;
189
190     memset(ss, 0, sizeof(*ss));
191
192     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
193     case I965_PP_FLAG_BOTTOM_FIELD:
194         ss->ss0.vert_line_stride_ofs = 1;
195         /* fall-through */
196     case I965_PP_FLAG_TOP_FIELD:
197         ss->ss0.vert_line_stride = 1;
198         height /= 2;
199         break;
200     }
201
202     ss->ss0.surface_type = I965_SURFACE_2D;
203     ss->ss0.surface_format = format;
204
205     ss->ss8.base_addr = bo->offset + offset;
206
207     ss->ss2.width = width - 1;
208     ss->ss2.height = height - 1;
209
210     ss->ss3.pitch = pitch - 1;
211
212     /* Always set 1(align 4 mode) per B-spec */
213     ss->ss0.vertical_alignment = 1;
214     ss->ss0.horizontal_alignment = 1;
215
216     dri_bo_get_tiling(bo, &tiling, &swizzle);
217     gen8_render_set_surface_tiling(ss, tiling);
218 }
219
220 static void
221 gen8_render_src_surface_state(
222     VADriverContextP ctx,
223     int              index,
224     dri_bo          *region,
225     unsigned long    offset,
226     int              w,
227     int              h,
228     int              pitch,
229     int              format,
230     unsigned int     flags
231 )
232 {
233     struct i965_driver_data *i965 = i965_driver_data(ctx);
234     struct i965_render_state *render_state = &i965->render_state;
235     void *ss;
236     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
237
238     assert(index < MAX_RENDER_SURFACES);
239
240     dri_bo_map(ss_bo, 1);
241     assert(ss_bo->virtual);
242     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
243
244     gen8_render_set_surface_state(ss,
245                                   region, offset,
246                                   w, h,
247                                   pitch, format, flags);
248     gen8_render_set_surface_scs(ss);
249     dri_bo_emit_reloc(ss_bo,
250                       I915_GEM_DOMAIN_SAMPLER, 0,
251                       offset,
252                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
253                       region);
254
255     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
256     dri_bo_unmap(ss_bo);
257     render_state->wm.sampler_count++;
258 }
259
260 static void
261 gen8_render_src_surfaces_state(
262     VADriverContextP ctx,
263     struct object_surface *obj_surface,
264     unsigned int     flags
265 )
266 {
267     int region_pitch;
268     int rw, rh;
269     dri_bo *region;
270
271     region_pitch = obj_surface->width;
272     rw = obj_surface->orig_width;
273     rh = obj_surface->orig_height;
274     region = obj_surface->bo;
275
276     gen8_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
277     gen8_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
278
279     if (obj_surface->fourcc == VA_FOURCC_NV12) {
280         gen8_render_src_surface_state(ctx, 3, region,
281                                       region_pitch * obj_surface->y_cb_offset,
282                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
283                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
284         gen8_render_src_surface_state(ctx, 4, region,
285                                       region_pitch * obj_surface->y_cb_offset,
286                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
287                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
288     } else {
289         gen8_render_src_surface_state(ctx, 3, region,
290                                       region_pitch * obj_surface->y_cb_offset,
291                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
292                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
293         gen8_render_src_surface_state(ctx, 4, region,
294                                       region_pitch * obj_surface->y_cb_offset,
295                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
296                                       I965_SURFACEFORMAT_R8_UNORM, flags);
297         gen8_render_src_surface_state(ctx, 5, region,
298                                       region_pitch * obj_surface->y_cr_offset,
299                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
300                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
301         gen8_render_src_surface_state(ctx, 6, region,
302                                       region_pitch * obj_surface->y_cr_offset,
303                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
304                                       I965_SURFACEFORMAT_R8_UNORM, flags);
305     }
306 }
307
308 static void
309 gen8_subpic_render_src_surfaces_state(VADriverContextP ctx,
310                                       struct object_surface *obj_surface)
311 {
312     dri_bo *subpic_region;
313     unsigned int index = obj_surface->subpic_render_idx;
314     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
315     struct object_image *obj_image = obj_subpic->obj_image;
316
317     assert(obj_surface);
318     assert(obj_surface->bo);
319     subpic_region = obj_image->bo;
320     /*subpicture surface*/
321     gen8_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
322     gen8_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
323 }
324
325 static void
326 gen8_render_dest_surface_state(VADriverContextP ctx, int index)
327 {
328     struct i965_driver_data *i965 = i965_driver_data(ctx);
329     struct i965_render_state *render_state = &i965->render_state;
330     struct intel_region *dest_region = render_state->draw_region;
331     void *ss;
332     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
333     int format;
334     assert(index < MAX_RENDER_SURFACES);
335
336     if (dest_region->cpp == 2) {
337         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
338     } else {
339         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
340     }
341
342     dri_bo_map(ss_bo, 1);
343     assert(ss_bo->virtual);
344     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
345
346     gen8_render_set_surface_state(ss,
347                                   dest_region->bo, 0,
348                                   dest_region->width, dest_region->height,
349                                   dest_region->pitch, format, 0);
350     gen8_render_set_surface_scs(ss);
351     dri_bo_emit_reloc(ss_bo,
352                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
353                       0,
354                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
355                       dest_region->bo);
356
357     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
358     dri_bo_unmap(ss_bo);
359 }
360
361 static void
362 i965_fill_vertex_buffer(
363     VADriverContextP ctx,
364     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
365     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
366 )
367 {
368     struct i965_driver_data * const i965 = i965_driver_data(ctx);
369     float vb[12];
370
371     enum { X1, Y1, X2, Y2 };
372
373     static const unsigned int g_rotation_indices[][6] = {
374         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
375         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
376         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
377         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
378     };
379
380     const unsigned int * const rotation_indices =
381         g_rotation_indices[i965->rotation_attrib->value];
382
383     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
384     vb[1]  = tex_coords[rotation_indices[1]];
385     vb[2]  = vid_coords[X2];
386     vb[3]  = vid_coords[Y2];
387
388     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
389     vb[5]  = tex_coords[rotation_indices[3]];
390     vb[6]  = vid_coords[X1];
391     vb[7]  = vid_coords[Y2];
392
393     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
394     vb[9]  = tex_coords[rotation_indices[5]];
395     vb[10] = vid_coords[X1];
396     vb[11] = vid_coords[Y1];
397
398     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
399 }
400
401 static void
402 i965_subpic_render_upload_vertex(VADriverContextP ctx,
403                                  struct object_surface *obj_surface,
404                                  const VARectangle *output_rect)
405 {
406     unsigned int index = obj_surface->subpic_render_idx;
407     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
408     float tex_coords[4], vid_coords[4];
409     VARectangle dst_rect;
410
411     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
412         dst_rect = obj_subpic->dst_rect;
413     else {
414         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
415         const float sy  = (float)output_rect->height / obj_surface->orig_height;
416         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
417         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
418         dst_rect.width  = sx * obj_subpic->dst_rect.width;
419         dst_rect.height = sy * obj_subpic->dst_rect.height;
420     }
421
422     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
423     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
424     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
425     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
426
427     vid_coords[0] = dst_rect.x;
428     vid_coords[1] = dst_rect.y;
429     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
430     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
431
432     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
433 }
434
435 static void
436 i965_render_upload_vertex(
437     VADriverContextP   ctx,
438     struct object_surface *obj_surface,
439     const VARectangle *src_rect,
440     const VARectangle *dst_rect
441 )
442 {
443     struct i965_driver_data *i965 = i965_driver_data(ctx);
444     struct i965_render_state *render_state = &i965->render_state;
445     struct intel_region *dest_region = render_state->draw_region;
446     float tex_coords[4], vid_coords[4];
447     int width, height;
448
449     width  = obj_surface->orig_width;
450     height = obj_surface->orig_height;
451
452     tex_coords[0] = (float)src_rect->x / width;
453     tex_coords[1] = (float)src_rect->y / height;
454     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
455     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
456
457     vid_coords[0] = dest_region->x + dst_rect->x;
458     vid_coords[1] = dest_region->y + dst_rect->y;
459     vid_coords[2] = vid_coords[0] + dst_rect->width;
460     vid_coords[3] = vid_coords[1] + dst_rect->height;
461
462     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
463 }
464
465 static void
466 i965_render_drawing_rectangle(VADriverContextP ctx)
467 {
468     struct i965_driver_data *i965 = i965_driver_data(ctx);
469     struct intel_batchbuffer *batch = i965->batch;
470     struct i965_render_state *render_state = &i965->render_state;
471     struct intel_region *dest_region = render_state->draw_region;
472
473     BEGIN_BATCH(batch, 4);
474     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
475     OUT_BATCH(batch, 0x00000000);
476     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
477     OUT_BATCH(batch, 0x00000000);
478     ADVANCE_BATCH(batch);
479 }
480
481 static void
482 i965_render_upload_image_palette(
483     VADriverContextP ctx,
484     struct object_image *obj_image,
485     unsigned int     alpha
486 )
487 {
488     struct i965_driver_data *i965 = i965_driver_data(ctx);
489     struct intel_batchbuffer *batch = i965->batch;
490     unsigned int i;
491
492     assert(obj_image);
493
494     if (!obj_image)
495         return;
496
497     if (obj_image->image.num_palette_entries == 0)
498         return;
499
500     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
501     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
502     /*fill palette*/
503     //int32_t out[16]; //0-23:color 23-31:alpha
504     for (i = 0; i < obj_image->image.num_palette_entries; i++)
505         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
506     ADVANCE_BATCH(batch);
507 }
508
509 static void
510 gen8_clear_dest_region(VADriverContextP ctx)
511 {
512     struct i965_driver_data *i965 = i965_driver_data(ctx);
513     struct intel_batchbuffer *batch = i965->batch;
514     struct i965_render_state *render_state = &i965->render_state;
515     struct intel_region *dest_region = render_state->draw_region;
516     unsigned int blt_cmd, br13;
517     int pitch;
518
519     blt_cmd = GEN8_XY_COLOR_BLT_CMD;
520     br13 = 0xf0 << 16;
521     pitch = dest_region->pitch;
522
523     if (dest_region->cpp == 4) {
524         br13 |= BR13_8888;
525         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
526     } else {
527         assert(dest_region->cpp == 2);
528         br13 |= BR13_565;
529     }
530
531     if (dest_region->tiling != I915_TILING_NONE) {
532         blt_cmd |= XY_COLOR_BLT_DST_TILED;
533         pitch /= 4;
534     }
535
536     br13 |= pitch;
537
538     intel_batchbuffer_start_atomic_blt(batch, 24);
539     BEGIN_BLT_BATCH(batch, 7);
540
541     OUT_BATCH(batch, blt_cmd);
542     OUT_BATCH(batch, br13);
543     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
544     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
545               (dest_region->x + dest_region->width));
546     OUT_RELOC(batch, dest_region->bo,
547               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
548               0);
549     OUT_BATCH(batch, 0x0);
550     OUT_BATCH(batch, 0x0);
551     ADVANCE_BATCH(batch);
552     intel_batchbuffer_end_atomic(batch);
553 }
554
555
556 /*
557  * for GEN8
558  */
559 #define ALIGNMENT       64
560
561 static void
562 gen8_render_initialize(VADriverContextP ctx)
563 {
564     struct i965_driver_data *i965 = i965_driver_data(ctx);
565     struct i965_render_state *render_state = &i965->render_state;
566     dri_bo *bo;
567     int size;
568     unsigned int end_offset;
569
570     /* VERTEX BUFFER */
571     dri_bo_unreference(render_state->vb.vertex_buffer);
572     bo = dri_bo_alloc(i965->intel.bufmgr,
573                       "vertex buffer",
574                       4096,
575                       4096);
576     assert(bo);
577     render_state->vb.vertex_buffer = bo;
578
579     /* WM */
580     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
581     bo = dri_bo_alloc(i965->intel.bufmgr,
582                       "surface state & binding table",
583                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
584                       4096);
585     assert(bo);
586     render_state->wm.surface_state_binding_table_bo = bo;
587
588     render_state->curbe_size = 256;
589
590     render_state->wm.sampler_count = 0;
591
592     render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
593
594     render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
595
596     render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
597
598     render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
599                         16 * sizeof(struct gen8_blend_state_rt);
600
601     render_state->sf_clip_size = 1024;
602
603     render_state->scissor_size = 1024;
604
605     size = ALIGN(render_state->curbe_size, ALIGNMENT) +
606         ALIGN(render_state->sampler_size, ALIGNMENT) +
607         ALIGN(render_state->cc_viewport_size, ALIGNMENT) +
608         ALIGN(render_state->cc_state_size, ALIGNMENT) +
609         ALIGN(render_state->blend_state_size, ALIGNMENT) +
610         ALIGN(render_state->sf_clip_size, ALIGNMENT) +
611         ALIGN(render_state->scissor_size, ALIGNMENT);
612
613     dri_bo_unreference(render_state->dynamic_state.bo);
614     bo = dri_bo_alloc(i965->intel.bufmgr,
615                       "dynamic_state",
616                       size,
617                       4096);
618
619     render_state->dynamic_state.bo = bo;
620
621     end_offset = 0;
622     render_state->dynamic_state.end_offset = 0;
623
624     /* Constant buffer offset */
625     render_state->curbe_offset = end_offset;
626     end_offset += ALIGN(render_state->curbe_size, ALIGNMENT);
627
628     /* Sampler_state  */
629     render_state->sampler_offset = end_offset;
630     end_offset += ALIGN(render_state->sampler_size, ALIGNMENT);
631
632     /* CC_VIEWPORT_state  */
633     render_state->cc_viewport_offset = end_offset;
634     end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT);
635
636     /* CC_STATE_state  */
637     render_state->cc_state_offset = end_offset;
638     end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT);
639
640     /* Blend_state  */
641     render_state->blend_state_offset = end_offset;
642     end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT);
643
644     /* SF_CLIP_state  */
645     render_state->sf_clip_offset = end_offset;
646     end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT);
647
648     /* SCISSOR_state  */
649     render_state->scissor_offset = end_offset;
650     end_offset += ALIGN(render_state->scissor_size, ALIGNMENT);
651
652     /* update the end offset of dynamic_state */
653     render_state->dynamic_state.end_offset = end_offset;
654
655 }
656
657 static void
658 gen8_render_sampler(VADriverContextP ctx)
659 {
660     struct i965_driver_data *i965 = i965_driver_data(ctx);
661     struct i965_render_state *render_state = &i965->render_state;
662     struct gen8_sampler_state *sampler_state;
663     int i;
664     unsigned char *cc_ptr;
665
666     assert(render_state->wm.sampler_count > 0);
667     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
668
669     dri_bo_map(render_state->dynamic_state.bo, 1);
670     assert(render_state->dynamic_state.bo->virtual);
671
672     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
673                         render_state->sampler_offset;
674
675     sampler_state = (struct gen8_sampler_state *) cc_ptr;
676
677     for (i = 0; i < render_state->wm.sampler_count; i++) {
678         memset(sampler_state, 0, sizeof(*sampler_state));
679         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
680         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
681         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
682         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
683         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
684         sampler_state++;
685     }
686
687     dri_bo_unmap(render_state->dynamic_state.bo);
688 }
689
690 static void
691 gen8_render_blend_state(VADriverContextP ctx)
692 {
693     struct i965_driver_data *i965 = i965_driver_data(ctx);
694     struct i965_render_state *render_state = &i965->render_state;
695     struct gen8_global_blend_state *global_blend_state;
696     struct gen8_blend_state_rt *blend_state;
697     unsigned char *cc_ptr;
698
699     dri_bo_map(render_state->dynamic_state.bo, 1);
700     assert(render_state->dynamic_state.bo->virtual);
701
702     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
703                         render_state->blend_state_offset;
704
705     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
706
707     memset(global_blend_state, 0, render_state->blend_state_size);
708     /* Global blend state + blend_state for Render Target */
709     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
710     blend_state->blend1.logic_op_enable = 1;
711     blend_state->blend1.logic_op_func = 0xc;
712     blend_state->blend1.pre_blend_clamp_enable = 1;
713
714     dri_bo_unmap(render_state->dynamic_state.bo);
715 }
716
717
718 static void
719 gen8_render_cc_viewport(VADriverContextP ctx)
720 {
721     struct i965_driver_data *i965 = i965_driver_data(ctx);
722     struct i965_render_state *render_state = &i965->render_state;
723     struct i965_cc_viewport *cc_viewport;
724     unsigned char *cc_ptr;
725
726     dri_bo_map(render_state->dynamic_state.bo, 1);
727     assert(render_state->dynamic_state.bo->virtual);
728
729     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
730                         render_state->cc_viewport_offset;
731
732     cc_viewport = (struct i965_cc_viewport *) cc_ptr;
733
734     memset(cc_viewport, 0, sizeof(*cc_viewport));
735
736     cc_viewport->min_depth = -1.e35;
737     cc_viewport->max_depth = 1.e35;
738
739     dri_bo_unmap(render_state->dynamic_state.bo);
740 }
741
742 static void
743 gen8_render_color_calc_state(VADriverContextP ctx)
744 {
745     struct i965_driver_data *i965 = i965_driver_data(ctx);
746     struct i965_render_state *render_state = &i965->render_state;
747     struct gen6_color_calc_state *color_calc_state;
748     unsigned char *cc_ptr;
749
750     dri_bo_map(render_state->dynamic_state.bo, 1);
751     assert(render_state->dynamic_state.bo->virtual);
752
753     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
754                         render_state->cc_state_offset;
755
756     color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
757
758     memset(color_calc_state, 0, sizeof(*color_calc_state));
759     color_calc_state->constant_r = 1.0;
760     color_calc_state->constant_g = 0.0;
761     color_calc_state->constant_b = 1.0;
762     color_calc_state->constant_a = 1.0;
763     dri_bo_unmap(render_state->dynamic_state.bo);
764 }
765
766 #define PI  3.1415926
767
768 static void
769 gen8_render_upload_constants(VADriverContextP ctx,
770                              struct object_surface *obj_surface,
771                              unsigned int flags)
772 {
773     struct i965_driver_data *i965 = i965_driver_data(ctx);
774     struct i965_render_state *render_state = &i965->render_state;
775     unsigned short *constant_buffer;
776     unsigned char *cc_ptr;
777     float *color_balance_base;
778     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
779     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
780     float hue = (float)i965->hue_attrib->value / 180 * PI;
781     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
782     float *yuv_to_rgb;
783     unsigned int color_flag;
784
785     dri_bo_map(render_state->dynamic_state.bo, 1);
786     assert(render_state->dynamic_state.bo->virtual);
787
788     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
789                         render_state->curbe_offset;
790
791     constant_buffer = (unsigned short *) cc_ptr;
792
793     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
794         assert(obj_surface->fourcc == VA_FOURCC_Y800);
795
796         *constant_buffer = 2;
797     } else {
798         if (obj_surface->fourcc == VA_FOURCC_NV12)
799             *constant_buffer = 1;
800         else
801             *constant_buffer = 0;
802     }
803
804     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
805         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
806         i965->hue_attrib->value == DEFAULT_HUE &&
807         i965->saturation_attrib->value == DEFAULT_SATURATION)
808         constant_buffer[1] = 1; /* skip color balance transformation */
809     else
810         constant_buffer[1] = 0;
811
812     color_balance_base = (float *)constant_buffer + 4;
813     *color_balance_base++ = contrast;
814     *color_balance_base++ = brightness;
815     *color_balance_base++ = cos(hue) * contrast * saturation;
816     *color_balance_base++ = sin(hue) * contrast * saturation;
817
818     color_flag = flags & VA_SRC_COLOR_MASK;
819     yuv_to_rgb = (float *)constant_buffer + 8;
820     if (color_flag == VA_SRC_BT709)
821         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
822     else if (color_flag == VA_SRC_SMPTE_240)
823         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
824     else
825         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
826
827     dri_bo_unmap(render_state->dynamic_state.bo);
828 }
829
830 static void
831 gen8_render_setup_states(
832     VADriverContextP   ctx,
833     struct object_surface *obj_surface,
834     const VARectangle *src_rect,
835     const VARectangle *dst_rect,
836     unsigned int       flags
837 )
838 {
839     gen8_render_dest_surface_state(ctx, 0);
840     gen8_render_src_surfaces_state(ctx, obj_surface, flags);
841     gen8_render_sampler(ctx);
842     gen8_render_cc_viewport(ctx);
843     gen8_render_color_calc_state(ctx);
844     gen8_render_blend_state(ctx);
845     gen8_render_upload_constants(ctx, obj_surface, flags);
846     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
847 }
848
849 static void
850 gen8_emit_state_base_address(VADriverContextP ctx)
851 {
852     struct i965_driver_data *i965 = i965_driver_data(ctx);
853     struct intel_batchbuffer *batch = i965->batch;
854     struct i965_render_state *render_state = &i965->render_state;
855
856     BEGIN_BATCH(batch, 16);
857     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
858     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
859         OUT_BATCH(batch, 0);
860         OUT_BATCH(batch, 0);
861         /*DW4 */
862     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
863         OUT_BATCH(batch, 0);
864
865         /*DW6*/
866     /* Dynamic state base address */
867     OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
868                 0, BASE_ADDRESS_MODIFY);
869     OUT_BATCH(batch, 0);
870
871         /*DW8*/
872     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
873     OUT_BATCH(batch, 0);
874
875         /*DW10 */
876     /* Instruction base address */
877     OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
878     OUT_BATCH(batch, 0);
879
880         /*DW12 */
881     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
882     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
883     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
884     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
885     ADVANCE_BATCH(batch);
886 }
887
888 static void
889 gen8_emit_cc_state_pointers(VADriverContextP ctx)
890 {
891     struct i965_driver_data *i965 = i965_driver_data(ctx);
892     struct intel_batchbuffer *batch = i965->batch;
893     struct i965_render_state *render_state = &i965->render_state;
894
895     BEGIN_BATCH(batch, 2);
896     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
897     OUT_BATCH(batch, (render_state->cc_state_offset + 1));
898     ADVANCE_BATCH(batch);
899
900     BEGIN_BATCH(batch, 2);
901     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
902     OUT_BATCH(batch, (render_state->blend_state_offset + 1));
903     ADVANCE_BATCH(batch);
904
905 }
906
907 static void
908 gen8_emit_vertices(VADriverContextP ctx)
909 {
910     struct i965_driver_data *i965 = i965_driver_data(ctx);
911     struct intel_batchbuffer *batch = i965->batch;
912     struct i965_render_state *render_state = &i965->render_state;
913
914     BEGIN_BATCH(batch, 5);
915     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
916     OUT_BATCH(batch,
917               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
918               (0 << GEN8_VB0_MOCS_SHIFT) |
919               GEN7_VB0_ADDRESS_MODIFYENABLE |
920               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
921     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
922     OUT_BATCH(batch, 0);
923     OUT_BATCH(batch, 12 * 4);
924     ADVANCE_BATCH(batch);
925
926     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
927     BEGIN_BATCH(batch, 2);
928     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
929     OUT_BATCH(batch,
930               _3DPRIM_RECTLIST);
931     ADVANCE_BATCH(batch);
932
933     BEGIN_BATCH(batch, 7);
934     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
935     OUT_BATCH(batch,
936               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
937     OUT_BATCH(batch, 3); /* vertex count per instance */
938     OUT_BATCH(batch, 0); /* start vertex offset */
939     OUT_BATCH(batch, 1); /* single instance */
940     OUT_BATCH(batch, 0); /* start instance location */
941     OUT_BATCH(batch, 0);
942     ADVANCE_BATCH(batch);
943 }
944
945 static void
946 gen8_emit_vertex_element_state(VADriverContextP ctx)
947 {
948     struct i965_driver_data *i965 = i965_driver_data(ctx);
949     struct intel_batchbuffer *batch = i965->batch;
950
951     /*
952      * The VUE layout
953      * dword 0-3: pad (0, 0, 0. 0)
954      * dword 4-7: position (x, y, 1.0, 1.0),
955      * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
956      */
957
958     /* Set up our vertex elements, sourced from the single vertex buffer. */
959     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
960
961     /* Element state 0. These are 4 dwords of 0 required for the VUE format.
962      * We don't really know or care what they do.
963      */
964
965     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
966               GEN8_VE0_VALID |
967               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
968               (0 << VE0_OFFSET_SHIFT));
969     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
970               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
971               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
972               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
973
974     /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
975     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
976               GEN8_VE0_VALID |
977               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
978               (8 << VE0_OFFSET_SHIFT));
979     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
980               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
981               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
982               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
983
984     /* offset 0: u,v -> {U, V, 1.0, 1.0} */
985     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
986               GEN8_VE0_VALID |
987               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
988               (0 << VE0_OFFSET_SHIFT));
989     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
990               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
991               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
992               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
993 }
994
995 static void
996 gen8_emit_vs_state(VADriverContextP ctx)
997 {
998     struct i965_driver_data *i965 = i965_driver_data(ctx);
999     struct intel_batchbuffer *batch = i965->batch;
1000
1001     /* disable VS constant buffer */
1002     BEGIN_BATCH(batch, 11);
1003     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
1004     OUT_BATCH(batch, 0);
1005     OUT_BATCH(batch, 0);
1006     /* CS Buffer 0 */
1007     OUT_BATCH(batch, 0);
1008     OUT_BATCH(batch, 0);
1009     /* CS Buffer 1 */
1010     OUT_BATCH(batch, 0);
1011     OUT_BATCH(batch, 0);
1012     /* CS Buffer 2 */
1013     OUT_BATCH(batch, 0);
1014     OUT_BATCH(batch, 0);
1015     /* CS Buffer 3 */
1016     OUT_BATCH(batch, 0);
1017     OUT_BATCH(batch, 0);
1018     ADVANCE_BATCH(batch);
1019
1020     BEGIN_BATCH(batch, 9);
1021     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
1022     OUT_BATCH(batch, 0); /* without VS kernel */
1023     OUT_BATCH(batch, 0);
1024     /* VS shader dispatch flag */
1025     OUT_BATCH(batch, 0);
1026     OUT_BATCH(batch, 0);
1027     OUT_BATCH(batch, 0);
1028     /* DW6. VS shader GRF and URB buffer definition */
1029     OUT_BATCH(batch, 0);
1030     OUT_BATCH(batch, 0); /* pass-through */
1031     OUT_BATCH(batch, 0);
1032     ADVANCE_BATCH(batch);
1033
1034     BEGIN_BATCH(batch, 2);
1035     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
1036     OUT_BATCH(batch, 0);
1037     ADVANCE_BATCH(batch);
1038
1039     BEGIN_BATCH(batch, 2);
1040     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
1041     OUT_BATCH(batch, 0);
1042     ADVANCE_BATCH(batch);
1043
1044 }
1045
1046 /*
1047  * URB layout on GEN8
1048  * ----------------------------------------
1049  * | PS Push Constants (8KB) | VS entries |
1050  * ----------------------------------------
1051  */
1052 static void
1053 gen8_emit_urb(VADriverContextP ctx)
1054 {
1055     struct i965_driver_data *i965 = i965_driver_data(ctx);
1056     struct intel_batchbuffer *batch = i965->batch;
1057     unsigned int num_urb_entries = 64;
1058
1059     /* The minimum urb entries is 64 */
1060
1061     BEGIN_BATCH(batch, 2);
1062     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
1063     OUT_BATCH(batch, 0);
1064     ADVANCE_BATCH(batch);
1065
1066     BEGIN_BATCH(batch, 2);
1067     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
1068     OUT_BATCH(batch, 0);
1069     ADVANCE_BATCH(batch);
1070
1071     BEGIN_BATCH(batch, 2);
1072     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
1073     OUT_BATCH(batch, 0);
1074     ADVANCE_BATCH(batch);
1075
1076     BEGIN_BATCH(batch, 2);
1077     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
1078     OUT_BATCH(batch, 0);
1079     ADVANCE_BATCH(batch);
1080
1081     /* Size is 8Kbs and base address is 0Kb */
1082     BEGIN_BATCH(batch, 2);
1083     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
1084     /* Size is 8Kbs and base address is 0Kb */
1085     OUT_BATCH(batch,
1086                 (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
1087                 (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
1088     ADVANCE_BATCH(batch);
1089
1090     BEGIN_BATCH(batch, 2);
1091     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
1092     OUT_BATCH(batch,
1093               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
1094               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
1095               (4 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1096    ADVANCE_BATCH(batch);
1097
1098    BEGIN_BATCH(batch, 2);
1099    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
1100    OUT_BATCH(batch,
1101              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1102              (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1103    ADVANCE_BATCH(batch);
1104
1105    BEGIN_BATCH(batch, 2);
1106    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
1107    OUT_BATCH(batch,
1108              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1109              (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1110    ADVANCE_BATCH(batch);
1111
1112    BEGIN_BATCH(batch, 2);
1113    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
1114    OUT_BATCH(batch,
1115              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1116              (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1117    ADVANCE_BATCH(batch);
1118 }
1119
1120 static void
1121 gen8_emit_bypass_state(VADriverContextP ctx)
1122 {
1123     struct i965_driver_data *i965 = i965_driver_data(ctx);
1124     struct intel_batchbuffer *batch = i965->batch;
1125
1126     /* bypass GS */
1127     BEGIN_BATCH(batch, 11);
1128     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
1129     OUT_BATCH(batch, 0);
1130     OUT_BATCH(batch, 0);
1131     OUT_BATCH(batch, 0);
1132     OUT_BATCH(batch, 0);
1133     OUT_BATCH(batch, 0);
1134     OUT_BATCH(batch, 0);
1135     OUT_BATCH(batch, 0);
1136     OUT_BATCH(batch, 0);
1137     OUT_BATCH(batch, 0);
1138     OUT_BATCH(batch, 0);
1139     ADVANCE_BATCH(batch);
1140
1141     BEGIN_BATCH(batch, 10);
1142     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
1143     /* GS shader address */
1144     OUT_BATCH(batch, 0); /* without GS kernel */
1145     OUT_BATCH(batch, 0);
1146     /* DW3. GS shader dispatch flag */
1147     OUT_BATCH(batch, 0);
1148     OUT_BATCH(batch, 0);
1149     OUT_BATCH(batch, 0);
1150     /* DW6. GS shader GRF and URB offset/length */
1151     OUT_BATCH(batch, 0);
1152     OUT_BATCH(batch, 0); /* pass-through */
1153     OUT_BATCH(batch, 0);
1154     OUT_BATCH(batch, 0);
1155     ADVANCE_BATCH(batch);
1156
1157     BEGIN_BATCH(batch, 2);
1158     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
1159     OUT_BATCH(batch, 0);
1160     ADVANCE_BATCH(batch);
1161
1162     BEGIN_BATCH(batch, 2);
1163     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
1164     OUT_BATCH(batch, 0);
1165     ADVANCE_BATCH(batch);
1166
1167     /* disable HS */
1168     BEGIN_BATCH(batch, 11);
1169     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
1170     OUT_BATCH(batch, 0);
1171     OUT_BATCH(batch, 0);
1172     OUT_BATCH(batch, 0);
1173     OUT_BATCH(batch, 0);
1174     OUT_BATCH(batch, 0);
1175     OUT_BATCH(batch, 0);
1176     OUT_BATCH(batch, 0);
1177     OUT_BATCH(batch, 0);
1178     OUT_BATCH(batch, 0);
1179     OUT_BATCH(batch, 0);
1180     ADVANCE_BATCH(batch);
1181
1182     BEGIN_BATCH(batch, 9);
1183     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
1184     OUT_BATCH(batch, 0);
1185     /*DW2. HS pass-through */
1186     OUT_BATCH(batch, 0);
1187     /*DW3. HS shader address */
1188     OUT_BATCH(batch, 0);
1189     OUT_BATCH(batch, 0);
1190     /*DW5. HS shader flag. URB offset/length and so on */
1191     OUT_BATCH(batch, 0);
1192     OUT_BATCH(batch, 0);
1193     OUT_BATCH(batch, 0);
1194     OUT_BATCH(batch, 0);
1195     ADVANCE_BATCH(batch);
1196
1197     BEGIN_BATCH(batch, 2);
1198     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
1199     OUT_BATCH(batch, 0);
1200     ADVANCE_BATCH(batch);
1201
1202     BEGIN_BATCH(batch, 2);
1203     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
1204     OUT_BATCH(batch, 0);
1205     ADVANCE_BATCH(batch);
1206
1207     /* Disable TE */
1208     BEGIN_BATCH(batch, 4);
1209     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
1210     OUT_BATCH(batch, 0);
1211     OUT_BATCH(batch, 0);
1212     OUT_BATCH(batch, 0);
1213     ADVANCE_BATCH(batch);
1214
1215     /* Disable DS */
1216     BEGIN_BATCH(batch, 11);
1217     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
1218     OUT_BATCH(batch, 0);
1219     OUT_BATCH(batch, 0);
1220     OUT_BATCH(batch, 0);
1221     OUT_BATCH(batch, 0);
1222     OUT_BATCH(batch, 0);
1223     OUT_BATCH(batch, 0);
1224     OUT_BATCH(batch, 0);
1225     OUT_BATCH(batch, 0);
1226     OUT_BATCH(batch, 0);
1227     OUT_BATCH(batch, 0);
1228     ADVANCE_BATCH(batch);
1229
1230     BEGIN_BATCH(batch, 9);
1231     OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2));
1232     /* DW1. DS shader pointer */
1233     OUT_BATCH(batch, 0);
1234     OUT_BATCH(batch, 0);
1235     /* DW3-5. DS shader dispatch flag.*/
1236     OUT_BATCH(batch, 0);
1237     OUT_BATCH(batch, 0);
1238     OUT_BATCH(batch, 0);
1239     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
1240     OUT_BATCH(batch, 0);
1241     OUT_BATCH(batch, 0);
1242     /* DW8. DS shader output URB */
1243     OUT_BATCH(batch, 0);
1244     ADVANCE_BATCH(batch);
1245
1246     BEGIN_BATCH(batch, 2);
1247     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
1248     OUT_BATCH(batch, 0);
1249     ADVANCE_BATCH(batch);
1250
1251     BEGIN_BATCH(batch, 2);
1252     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
1253     OUT_BATCH(batch, 0);
1254     ADVANCE_BATCH(batch);
1255
1256     /* Disable STREAMOUT */
1257     BEGIN_BATCH(batch, 5);
1258     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
1259     OUT_BATCH(batch, 0);
1260     OUT_BATCH(batch, 0);
1261     OUT_BATCH(batch, 0);
1262     OUT_BATCH(batch, 0);
1263     ADVANCE_BATCH(batch);
1264 }
1265
1266 static void
1267 gen8_emit_invarient_states(VADriverContextP ctx)
1268 {
1269     struct i965_driver_data *i965 = i965_driver_data(ctx);
1270     struct intel_batchbuffer *batch = i965->batch;
1271
1272     BEGIN_BATCH(batch, 1);
1273     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1274     ADVANCE_BATCH(batch);
1275
1276     BEGIN_BATCH(batch, 2);
1277     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
1278     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1279               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1280     ADVANCE_BATCH(batch);
1281
1282     /* Update 3D Multisample pattern */
1283     BEGIN_BATCH(batch, 9);
1284     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
1285     OUT_BATCH(batch, 0);
1286     OUT_BATCH(batch, 0);
1287     OUT_BATCH(batch, 0);
1288     OUT_BATCH(batch, 0);
1289     OUT_BATCH(batch, 0);
1290     OUT_BATCH(batch, 0);
1291     OUT_BATCH(batch, 0);
1292     OUT_BATCH(batch, 0);
1293     ADVANCE_BATCH(batch);
1294
1295
1296     BEGIN_BATCH(batch, 2);
1297     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1298     OUT_BATCH(batch, 1);
1299     ADVANCE_BATCH(batch);
1300
1301     /* Set system instruction pointer */
1302     BEGIN_BATCH(batch, 3);
1303     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1304     OUT_BATCH(batch, 0);
1305     OUT_BATCH(batch, 0);
1306     ADVANCE_BATCH(batch);
1307 }
1308
1309 static void
1310 gen8_emit_clip_state(VADriverContextP ctx)
1311 {
1312     struct i965_driver_data *i965 = i965_driver_data(ctx);
1313     struct intel_batchbuffer *batch = i965->batch;
1314
1315     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1316     OUT_BATCH(batch, 0);
1317     OUT_BATCH(batch, 0); /* pass-through */
1318     OUT_BATCH(batch, 0);
1319 }
1320
1321 static void
1322 gen8_emit_sf_state(VADriverContextP ctx)
1323 {
1324     struct i965_driver_data *i965 = i965_driver_data(ctx);
1325     struct intel_batchbuffer *batch = i965->batch;
1326
1327     BEGIN_BATCH(batch, 5);
1328     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
1329     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
1330     OUT_BATCH(batch, 0);
1331     OUT_BATCH(batch, 0);
1332     OUT_BATCH(batch, 0);
1333     ADVANCE_BATCH(batch);
1334
1335
1336     BEGIN_BATCH(batch, 4);
1337     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2));
1338     OUT_BATCH(batch,
1339               (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
1340               (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
1341               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
1342               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
1343               (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
1344     OUT_BATCH(batch, 0);
1345     OUT_BATCH(batch, 0);
1346     ADVANCE_BATCH(batch);
1347
1348     /* SBE for backend setup */
1349     BEGIN_BATCH(batch, 11);
1350     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
1351     OUT_BATCH(batch, 0);
1352     OUT_BATCH(batch, 0);
1353     OUT_BATCH(batch, 0);
1354     OUT_BATCH(batch, 0);
1355     OUT_BATCH(batch, 0);
1356     OUT_BATCH(batch, 0);
1357     OUT_BATCH(batch, 0);
1358     OUT_BATCH(batch, 0);
1359     OUT_BATCH(batch, 0);
1360     OUT_BATCH(batch, 0);
1361     ADVANCE_BATCH(batch);
1362
1363     BEGIN_BATCH(batch, 4);
1364     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
1365     OUT_BATCH(batch, 0);
1366     OUT_BATCH(batch, 0);
1367     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
1368     ADVANCE_BATCH(batch);
1369 }
1370
1371 static void
1372 gen8_emit_wm_state(VADriverContextP ctx, int kernel)
1373 {
1374     struct i965_driver_data *i965 = i965_driver_data(ctx);
1375     struct intel_batchbuffer *batch = i965->batch;
1376     struct i965_render_state *render_state = &i965->render_state;
1377     unsigned int num_samples = 0;
1378     unsigned int max_threads;
1379
1380     max_threads = i965->intel.device_info->max_wm_threads - 2;
1381
1382     BEGIN_BATCH(batch, 2);
1383     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
1384     OUT_BATCH(batch,
1385               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
1386     ADVANCE_BATCH(batch);
1387
1388     if (kernel == PS_KERNEL) {
1389         BEGIN_BATCH(batch, 2);
1390         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1391         OUT_BATCH(batch,
1392                 GEN8_PS_BLEND_HAS_WRITEABLE_RT);
1393         ADVANCE_BATCH(batch);
1394     } else if (kernel == PS_SUBPIC_KERNEL) {
1395         BEGIN_BATCH(batch, 2);
1396         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1397         OUT_BATCH(batch,
1398                 (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
1399                  GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
1400                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
1401                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
1402                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
1403                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
1404         ADVANCE_BATCH(batch);
1405     }
1406
1407     BEGIN_BATCH(batch, 2);
1408     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
1409     OUT_BATCH(batch,
1410               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1411     ADVANCE_BATCH(batch);
1412
1413     BEGIN_BATCH(batch, 11);
1414     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
1415     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
1416     OUT_BATCH(batch, 0);
1417     /*DW3-4. Constant buffer 0 */
1418     OUT_BATCH(batch, render_state->curbe_offset);
1419     OUT_BATCH(batch, 0);
1420
1421     /*DW5-10. Constant buffer 1-3 */
1422     OUT_BATCH(batch, 0);
1423     OUT_BATCH(batch, 0);
1424     OUT_BATCH(batch, 0);
1425     OUT_BATCH(batch, 0);
1426     OUT_BATCH(batch, 0);
1427     OUT_BATCH(batch, 0);
1428     ADVANCE_BATCH(batch);
1429
1430     BEGIN_BATCH(batch, 12);
1431     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
1432     /* PS shader address */
1433     OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
1434
1435     OUT_BATCH(batch, 0);
1436     /* DW3. PS shader flag .Binding table cnt/sample cnt */
1437     OUT_BATCH(batch,
1438               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
1439               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
1440     /* DW4-5. Scatch space */
1441     OUT_BATCH(batch, 0); /* scratch space base offset */
1442     OUT_BATCH(batch, 0);
1443     /* DW6. PS shader threads. */
1444     OUT_BATCH(batch,
1445               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
1446               GEN7_PS_PUSH_CONSTANT_ENABLE |
1447               GEN7_PS_16_DISPATCH_ENABLE);
1448     /* DW7. PS shader GRF */
1449     OUT_BATCH(batch,
1450               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
1451     OUT_BATCH(batch, 0); /* kernel 1 pointer */
1452     OUT_BATCH(batch, 0);
1453     OUT_BATCH(batch, 0); /* kernel 2 pointer */
1454     OUT_BATCH(batch, 0);
1455     ADVANCE_BATCH(batch);
1456
1457     BEGIN_BATCH(batch, 2);
1458     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
1459     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1460     ADVANCE_BATCH(batch);
1461 }
1462
1463 static void
1464 gen8_emit_depth_buffer_state(VADriverContextP ctx)
1465 {
1466     struct i965_driver_data *i965 = i965_driver_data(ctx);
1467     struct intel_batchbuffer *batch = i965->batch;
1468
1469     BEGIN_BATCH(batch, 8);
1470     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
1471     OUT_BATCH(batch,
1472               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
1473               (I965_SURFACE_NULL << 29));
1474     /* DW2-3. Depth Buffer Address */
1475     OUT_BATCH(batch, 0);
1476     OUT_BATCH(batch, 0);
1477     /* DW4-7. Surface structure */
1478     OUT_BATCH(batch, 0);
1479     OUT_BATCH(batch, 0);
1480     OUT_BATCH(batch, 0);
1481     OUT_BATCH(batch, 0);
1482     ADVANCE_BATCH(batch);
1483
1484     /* Update the Hier Depth buffer */
1485     BEGIN_BATCH(batch, 5);
1486     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
1487     OUT_BATCH(batch, 0);
1488     OUT_BATCH(batch, 0);
1489     OUT_BATCH(batch, 0);
1490     OUT_BATCH(batch, 0);
1491     ADVANCE_BATCH(batch);
1492
1493     /* Update the stencil buffer */
1494     BEGIN_BATCH(batch, 5);
1495     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
1496     OUT_BATCH(batch, 0);
1497     OUT_BATCH(batch, 0);
1498     OUT_BATCH(batch, 0);
1499     OUT_BATCH(batch, 0);
1500     ADVANCE_BATCH(batch);
1501
1502     BEGIN_BATCH(batch, 3);
1503     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
1504     OUT_BATCH(batch, 0);
1505     OUT_BATCH(batch, 0);
1506     ADVANCE_BATCH(batch);
1507 }
1508
1509 static void
1510 gen8_emit_depth_stencil_state(VADriverContextP ctx)
1511 {
1512     struct i965_driver_data *i965 = i965_driver_data(ctx);
1513     struct intel_batchbuffer *batch = i965->batch;
1514
1515     BEGIN_BATCH(batch, 3);
1516     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
1517     OUT_BATCH(batch, 0);
1518     OUT_BATCH(batch, 0);
1519     ADVANCE_BATCH(batch);
1520 }
1521
1522 static void
1523 gen8_emit_wm_hz_op(VADriverContextP ctx)
1524 {
1525     struct i965_driver_data *i965 = i965_driver_data(ctx);
1526     struct intel_batchbuffer *batch = i965->batch;
1527
1528     BEGIN_BATCH(batch, 5);
1529     OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
1530     OUT_BATCH(batch, 0);
1531     OUT_BATCH(batch, 0);
1532     OUT_BATCH(batch, 0);
1533     OUT_BATCH(batch, 0);
1534     ADVANCE_BATCH(batch);
1535 }
1536
1537 static void
1538 gen8_emit_viewport_state_pointers(VADriverContextP ctx)
1539 {
1540     struct i965_driver_data *i965 = i965_driver_data(ctx);
1541     struct intel_batchbuffer *batch = i965->batch;
1542     struct i965_render_state *render_state = &i965->render_state;
1543
1544     BEGIN_BATCH(batch, 2);
1545     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
1546     OUT_BATCH(batch, render_state->cc_viewport_offset);
1547     ADVANCE_BATCH(batch);
1548
1549     BEGIN_BATCH(batch, 2);
1550     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
1551     OUT_BATCH(batch, 0);
1552     ADVANCE_BATCH(batch);
1553 }
1554
1555 static void
1556 gen8_emit_sampler_state_pointers(VADriverContextP ctx)
1557 {
1558     struct i965_driver_data *i965 = i965_driver_data(ctx);
1559     struct intel_batchbuffer *batch = i965->batch;
1560     struct i965_render_state *render_state = &i965->render_state;
1561
1562     BEGIN_BATCH(batch, 2);
1563     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
1564     OUT_BATCH(batch, render_state->sampler_offset);
1565     ADVANCE_BATCH(batch);
1566 }
1567
1568
1569 static void
1570 gen7_emit_drawing_rectangle(VADriverContextP ctx)
1571 {
1572     i965_render_drawing_rectangle(ctx);
1573 }
1574
1575 static void
1576 gen8_render_emit_states(VADriverContextP ctx, int kernel)
1577 {
1578     struct i965_driver_data *i965 = i965_driver_data(ctx);
1579     struct intel_batchbuffer *batch = i965->batch;
1580
1581     intel_batchbuffer_start_atomic(batch, 0x1000);
1582     intel_batchbuffer_emit_mi_flush(batch);
1583     gen8_emit_invarient_states(ctx);
1584     gen8_emit_state_base_address(ctx);
1585     gen8_emit_viewport_state_pointers(ctx);
1586     gen8_emit_urb(ctx);
1587     gen8_emit_cc_state_pointers(ctx);
1588     gen8_emit_sampler_state_pointers(ctx);
1589     gen8_emit_wm_hz_op(ctx);
1590     gen8_emit_bypass_state(ctx);
1591     gen8_emit_vs_state(ctx);
1592     gen8_emit_clip_state(ctx);
1593     gen8_emit_sf_state(ctx);
1594     gen8_emit_depth_stencil_state(ctx);
1595     gen8_emit_wm_state(ctx, kernel);
1596     gen8_emit_depth_buffer_state(ctx);
1597     gen7_emit_drawing_rectangle(ctx);
1598     gen8_emit_vertex_element_state(ctx);
1599     gen8_emit_vertices(ctx);
1600     intel_batchbuffer_end_atomic(batch);
1601 }
1602
1603 static void
1604 gen8_render_put_surface(
1605     VADriverContextP   ctx,
1606     struct object_surface *obj_surface,
1607     const VARectangle *src_rect,
1608     const VARectangle *dst_rect,
1609     unsigned int       flags
1610 )
1611 {
1612     struct i965_driver_data *i965 = i965_driver_data(ctx);
1613     struct intel_batchbuffer *batch = i965->batch;
1614
1615     gen8_render_initialize(ctx);
1616     gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
1617     gen8_clear_dest_region(ctx);
1618     gen8_render_emit_states(ctx, PS_KERNEL);
1619     intel_batchbuffer_flush(batch);
1620 }
1621
1622 static void
1623 gen8_subpicture_render_blend_state(VADriverContextP ctx)
1624 {
1625     struct i965_driver_data *i965 = i965_driver_data(ctx);
1626     struct i965_render_state *render_state = &i965->render_state;
1627     struct gen8_global_blend_state *global_blend_state;
1628     struct gen8_blend_state_rt *blend_state;
1629     unsigned char *cc_ptr;
1630
1631     dri_bo_map(render_state->dynamic_state.bo, 1);
1632     assert(render_state->dynamic_state.bo->virtual);
1633
1634     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1635                         render_state->blend_state_offset;
1636
1637     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
1638
1639     memset(global_blend_state, 0, render_state->blend_state_size);
1640     /* Global blend state + blend_state for Render Target */
1641     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
1642     blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
1643     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1644     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1645     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
1646     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1647     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1648     blend_state->blend0.colorbuf_blend = 1;
1649     blend_state->blend1.post_blend_clamp_enable = 1;
1650     blend_state->blend1.pre_blend_clamp_enable = 1;
1651     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
1652
1653     dri_bo_unmap(render_state->dynamic_state.bo);
1654 }
1655
1656 static void
1657 gen8_subpic_render_upload_constants(VADriverContextP ctx,
1658                                     struct object_surface *obj_surface)
1659 {
1660     struct i965_driver_data *i965 = i965_driver_data(ctx);
1661     struct i965_render_state *render_state = &i965->render_state;
1662     float *constant_buffer;
1663     float global_alpha = 1.0;
1664     unsigned int index = obj_surface->subpic_render_idx;
1665     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1666     unsigned char *cc_ptr;
1667
1668     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1669         global_alpha = obj_subpic->global_alpha;
1670     }
1671
1672
1673     dri_bo_map(render_state->dynamic_state.bo, 1);
1674     assert(render_state->dynamic_state.bo->virtual);
1675
1676     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1677                                 render_state->curbe_offset;
1678
1679     constant_buffer = (float *) cc_ptr;
1680     *constant_buffer = global_alpha;
1681
1682     dri_bo_unmap(render_state->dynamic_state.bo);
1683 }
1684
1685 static void
1686 gen8_subpicture_render_setup_states(
1687     VADriverContextP   ctx,
1688     struct object_surface *obj_surface,
1689     const VARectangle *src_rect,
1690     const VARectangle *dst_rect
1691 )
1692 {
1693     gen8_render_dest_surface_state(ctx, 0);
1694     gen8_subpic_render_src_surfaces_state(ctx, obj_surface);
1695     gen8_render_sampler(ctx);
1696     gen8_render_cc_viewport(ctx);
1697     gen8_render_color_calc_state(ctx);
1698     gen8_subpicture_render_blend_state(ctx);
1699     gen8_subpic_render_upload_constants(ctx, obj_surface);
1700     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1701 }
1702
1703 static void
1704 gen8_render_put_subpicture(
1705     VADriverContextP   ctx,
1706     struct object_surface *obj_surface,
1707     const VARectangle *src_rect,
1708     const VARectangle *dst_rect
1709 )
1710 {
1711     struct i965_driver_data *i965 = i965_driver_data(ctx);
1712     struct intel_batchbuffer *batch = i965->batch;
1713     unsigned int index = obj_surface->subpic_render_idx;
1714     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1715
1716     assert(obj_subpic);
1717     gen8_render_initialize(ctx);
1718     gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
1719     gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
1720     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1721     intel_batchbuffer_flush(batch);
1722 }
1723
1724 bool
1725 gen8_render_init(VADriverContextP ctx)
1726 {
1727     struct i965_driver_data *i965 = i965_driver_data(ctx);
1728     struct i965_render_state *render_state = &i965->render_state;
1729     int i, kernel_size;
1730     unsigned int kernel_offset, end_offset;
1731     unsigned char *kernel_ptr;
1732     struct i965_kernel *kernel;
1733
1734     render_state->render_put_surface = gen8_render_put_surface;
1735     render_state->render_put_subpicture = gen8_render_put_subpicture;
1736
1737     if (IS_GEN8(i965->intel.device_id)) {
1738         memcpy(render_state->render_kernels, render_kernels_gen8,
1739                         sizeof(render_state->render_kernels));
1740     }
1741
1742     kernel_size = 4096;
1743
1744     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1745         kernel = &render_state->render_kernels[i];
1746
1747         if (!kernel->size)
1748             continue;
1749
1750         kernel_size += kernel->size;
1751     }
1752
1753     render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1754                                   "kernel shader",
1755                                   kernel_size,
1756                                   0x1000);
1757     if (render_state->instruction_state.bo == NULL) {
1758         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1759         return false;
1760     }
1761
1762     assert(render_state->instruction_state.bo);
1763
1764     render_state->instruction_state.bo_size = kernel_size;
1765     render_state->instruction_state.end_offset = 0;
1766     end_offset = 0;
1767
1768     dri_bo_map(render_state->instruction_state.bo, 1);
1769     kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
1770     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1771         kernel = &render_state->render_kernels[i];
1772         kernel_offset = end_offset;
1773         kernel->kernel_offset = kernel_offset;
1774
1775         if (!kernel->size)
1776             continue;
1777
1778         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1779
1780         end_offset += ALIGN(kernel->size, ALIGNMENT);
1781     }
1782
1783     render_state->instruction_state.end_offset = end_offset;
1784
1785     dri_bo_unmap(render_state->instruction_state.bo);
1786
1787     return true;
1788 }
1789
1790
1791 void
1792 gen8_render_terminate(VADriverContextP ctx)
1793 {
1794     struct i965_driver_data *i965 = i965_driver_data(ctx);
1795     struct i965_render_state *render_state = &i965->render_state;
1796
1797     dri_bo_unreference(render_state->vb.vertex_buffer);
1798     render_state->vb.vertex_buffer = NULL;
1799
1800     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1801     render_state->wm.surface_state_binding_table_bo = NULL;
1802
1803     if (render_state->instruction_state.bo) {
1804         dri_bo_unreference(render_state->instruction_state.bo);
1805         render_state->instruction_state.bo = NULL;
1806     }
1807
1808     if (render_state->dynamic_state.bo) {
1809         dri_bo_unreference(render_state->dynamic_state.bo);
1810         render_state->dynamic_state.bo = NULL;
1811     }
1812
1813     if (render_state->indirect_state.bo) {
1814         dri_bo_unreference(render_state->indirect_state.bo);
1815         render_state->indirect_state.bo = NULL;
1816     }
1817
1818     if (render_state->draw_region) {
1819         dri_bo_unreference(render_state->draw_region->bo);
1820         free(render_state->draw_region);
1821         render_state->draw_region = NULL;
1822     }
1823 }
1824