1 /* cairo - a vector graphics library with display and print output
3 * Copyright © 2009 Kristian Høgsberg
4 * Copyright © 2009 Chris Wilson
5 * Copyright © 2009 Intel Corporation
7 * This library is free software; you can redistribute it and/or
8 * modify it either under the terms of the GNU Lesser General Public
9 * License version 2.1 as published by the Free Software Foundation
10 * (the "LGPL") or, at your option, under the terms of the Mozilla
11 * Public License Version 1.1 (the "MPL"). If you do not alter this
12 * notice, a recipient may use your version of this file under either
13 * the MPL or the LGPL.
15 * You should have received a copy of the LGPL along with this library
16 * in the file COPYING-LGPL-2.1; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA
18 * You should have received a copy of the MPL along with this library
19 * in the file COPYING-MPL-1.1
21 * The contents of this file are subject to the Mozilla Public License
22 * Version 1.1 (the "License"); you may not use this file except in
23 * compliance with the License. You may obtain a copy of the License at
24 * http://www.mozilla.org/MPL/
26 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY
27 * OF ANY KIND, either express or implied. See the LGPL or the MPL for
28 * the specific language governing rights and limitations.
30 * The Original Code is the cairo graphics library.
33 * Chris Wilson <chris@chris-wilson.co.uk>
34 * Kristian Høgsberg <krh@bitplanet.net>
39 #include "cairo-error-private.h"
40 #include "cairo-drm-i965-private.h"
41 #include "cairo-surface-subsurface-private.h"
42 #include "cairo-surface-snapshot-private.h"
44 #include "cairo-drm-intel-brw-eu.h"
48 * 3 types of rectangular inputs:
49 * (a) standard composite: x,y, use source, mask matrices to compute texcoords
50 * (b) spans: x,y, alpha, use source matrix
51 * (c) glyphs: x,y, s,t, use source matrix
53 * 5 types of pixel shaders:
55 * (b) Linear gradient (via 1D texture, with precomputed tex)
56 * (c) Radial gradient (per-pixel s computation, 1D texture)
57 * (d) Spans (mask only): apply opacity
58 * (e) Texture (includes glyphs).
60 * Clip masks are limited to 2D textures only.
63 /* XXX dual source blending for LERP + ComponentAlpha!!! */
65 #define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
67 #define SF_KERNEL_NUM_GRF 1
68 #define SF_MAX_THREADS 24
70 #define PS_MAX_THREADS_CTG 50
71 #define PS_MAX_THREADS_BRW 32
73 #define URB_CS_ENTRY_SIZE 3 /* We need 4 matrices + 2 sources */
74 #define URB_CS_ENTRIES 4 /* 4x sets of CONSTANT_BUFFER */
76 #define URB_VS_ENTRY_SIZE 1
77 #define URB_VS_ENTRIES 8
79 #define URB_GS_ENTRY_SIZE 0
80 #define URB_GS_ENTRIES 0
82 #define URB_CLIP_ENTRY_SIZE 0
83 #define URB_CLIP_ENTRIES 0
85 #define URB_SF_ENTRY_SIZE 1
86 #define URB_SF_ENTRIES (SF_MAX_THREADS + 1)
89 i965_pipelined_flush (i965_device_t *device)
91 intel_bo_t *bo, *next;
93 if (device->batch.used == 0)
96 OUT_BATCH (BRW_PIPE_CONTROL |
97 BRW_PIPE_CONTROL_NOWRITE |
98 BRW_PIPE_CONTROL_WC_FLUSH |
100 OUT_BATCH(0); /* Destination address */
101 OUT_BATCH(0); /* Immediate data low DW */
102 OUT_BATCH(0); /* Immediate data high DW */
104 cairo_list_foreach_entry_safe (bo, next, intel_bo_t, &device->flush, link) {
105 bo->batch_write_domain = 0;
106 cairo_list_init (&bo->link);
108 cairo_list_init (&device->flush);
111 static cairo_status_t
112 i965_shader_acquire_solid (i965_shader_t *shader,
113 union i965_shader_channel *src,
114 const cairo_solid_pattern_t *solid,
115 const cairo_rectangle_int_t *extents)
117 src->type.fragment = FS_CONSTANT;
118 src->type.vertex = VS_NONE;
119 src->type.pattern = PATTERN_SOLID;
121 src->base.content = _cairo_color_get_content (&solid->color);
122 src->base.constants[0] = solid->color.red * solid->color.alpha;
123 src->base.constants[1] = solid->color.green * solid->color.alpha;
124 src->base.constants[2] = solid->color.blue * solid->color.alpha;
125 src->base.constants[3] = solid->color.alpha;
126 src->base.constants_size = 4;
128 return CAIRO_STATUS_SUCCESS;
131 static cairo_status_t
132 i965_shader_acquire_linear (i965_shader_t *shader,
133 union i965_shader_channel *src,
134 const cairo_linear_pattern_t *linear,
135 const cairo_rectangle_int_t *extents)
137 intel_buffer_t buffer;
138 cairo_status_t status;
140 double dx, dy, offset;
142 status = intel_gradient_render (&i965_device (shader->target)->intel,
143 &linear->base, &buffer);
144 if (unlikely (status))
147 src->type.vertex = VS_NONE;
148 src->type.pattern = PATTERN_LINEAR;
149 src->type.fragment = FS_LINEAR;
150 src->base.bo = buffer.bo;
151 src->base.content = CAIRO_CONTENT_COLOR_ALPHA;
152 src->base.format = buffer.format;
153 src->base.width = buffer.width;
154 src->base.height = buffer.height;
155 src->base.stride = buffer.stride;
156 src->base.filter = i965_filter (CAIRO_FILTER_BILINEAR);
157 src->base.extend = i965_extend (linear->base.base.extend);
159 dx = linear->pd2.x - linear->pd1.x;
160 dy = linear->pd2.y - linear->pd1.y;
161 sf = 1. / (dx * dx + dy * dy);
167 offset = dx*x0 + dy*y0;
169 if (_cairo_matrix_is_identity (&linear->base.base.matrix)) {
170 src->base.matrix.xx = dx;
171 src->base.matrix.xy = dy;
172 src->base.matrix.x0 = -offset;
176 cairo_matrix_init (&m, dx, 0, dy, 0, -offset, 0);
177 cairo_matrix_multiply (&src->base.matrix, &linear->base.base.matrix, &m);
179 src->base.matrix.yx = 0.;
180 src->base.matrix.yy = 1.;
181 src->base.matrix.y0 = 0.;
183 return CAIRO_STATUS_SUCCESS;
186 static cairo_status_t
187 i965_shader_acquire_radial (i965_shader_t *shader,
188 union i965_shader_channel *src,
189 const cairo_radial_pattern_t *radial,
190 const cairo_rectangle_int_t *extents)
192 intel_buffer_t buffer;
193 cairo_status_t status;
194 double dx, dy, dr, r1;
196 status = intel_gradient_render (&i965_device (shader->target)->intel,
197 &radial->base, &buffer);
198 if (unlikely (status))
201 src->type.vertex = VS_NONE;
202 src->type.pattern = PATTERN_RADIAL;
203 src->type.fragment = FS_RADIAL;
204 src->base.bo = buffer.bo;
205 src->base.content = CAIRO_CONTENT_COLOR_ALPHA;
206 src->base.format = buffer.format;
207 src->base.width = buffer.width;
208 src->base.height = buffer.height;
209 src->base.stride = buffer.stride;
210 src->base.filter = i965_filter (CAIRO_FILTER_BILINEAR);
211 src->base.extend = i965_extend (radial->base.base.extend);
213 dx = radial->cd2.center.x - radial->cd1.center.x;
214 dy = radial->cd2.center.y - radial->cd1.center.y;
215 dr = radial->cd2.radius - radial->cd1.radius;
217 r1 = radial->cd1.radius;
219 if (FALSE && (radial->cd2.center.x == radial->cd1.center.x &&
220 radial->cd2.center.y == radial->cd1.center.y))
222 /* XXX dr == 0, meaningless with anything other than PAD */
223 src->base.constants[0] = radial->cd1.center.x / dr;
224 src->base.constants[1] = radial->cd1.center.y / dr;
225 src->base.constants[2] = 1. / dr;
226 src->base.constants[3] = -r1 / dr;
228 src->base.constants_size = 4;
229 src->base.mode = RADIAL_ONE;
231 src->base.constants[0] = -radial->cd1.center.x;
232 src->base.constants[1] = -radial->cd1.center.y;
233 src->base.constants[2] = r1;
234 src->base.constants[3] = -4 * (dx*dx + dy*dy - dr*dr);
236 src->base.constants[4] = -2 * dx;
237 src->base.constants[5] = -2 * dy;
238 src->base.constants[6] = -2 * r1 * dr;
239 src->base.constants[7] = 1 / (2 * (dx*dx + dy*dy - dr*dr));
241 src->base.constants_size = 8;
242 src->base.mode = RADIAL_TWO;
245 return CAIRO_STATUS_SUCCESS;
248 static cairo_status_t
249 i965_surface_clone (i965_device_t *device,
250 cairo_image_surface_t *image,
251 i965_surface_t **clone_out)
253 i965_surface_t *clone;
254 cairo_status_t status;
256 clone = (i965_surface_t *)
257 i965_surface_create_internal (&device->intel.base,
263 if (unlikely (clone->intel.drm.base.status))
264 return clone->intel.drm.base.status;
266 status = intel_bo_put_image (&device->intel,
267 to_intel_bo (clone->intel.drm.bo),
270 image->width, image->height,
273 if (unlikely (status)) {
274 cairo_surface_destroy (&clone->intel.drm.base);
278 status = intel_snapshot_cache_insert (&device->intel, &clone->intel);
279 if (unlikely (status)) {
280 cairo_surface_destroy (&clone->intel.drm.base);
284 _cairo_surface_attach_snapshot (&image->base,
285 &clone->intel.drm.base,
286 intel_surface_detach_snapshot);
289 return CAIRO_STATUS_SUCCESS;
292 static cairo_status_t
293 i965_surface_clone_subimage (i965_device_t *device,
294 cairo_image_surface_t *image,
295 const cairo_rectangle_int_t *extents,
296 i965_surface_t **clone_out)
298 i965_surface_t *clone;
299 cairo_status_t status;
301 clone = (i965_surface_t *)
302 i965_surface_create_internal (&device->intel.base,
308 if (unlikely (clone->intel.drm.base.status))
309 return clone->intel.drm.base.status;
311 status = intel_bo_put_image (to_intel_device (clone->intel.drm.base.device),
312 to_intel_bo (clone->intel.drm.bo),
314 extents->x, extents->y,
315 extents->width, extents->height,
317 if (unlikely (status))
321 return CAIRO_STATUS_SUCCESS;
324 static cairo_status_t
325 i965_shader_acquire_solid_surface (i965_shader_t *shader,
326 union i965_shader_channel *src,
327 cairo_surface_t *surface,
328 const cairo_rectangle_int_t *extents)
330 cairo_image_surface_t *image;
332 cairo_status_t status;
335 status = _cairo_surface_acquire_source_image (surface, &image, &image_extra);
336 if (unlikely (status))
339 if (image->format != CAIRO_FORMAT_ARGB32) {
340 cairo_surface_t *pixel;
341 cairo_surface_pattern_t pattern;
343 /* extract the pixel as argb32 */
344 pixel = cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 1, 1);
345 _cairo_pattern_init_for_surface (&pattern, &image->base);
346 cairo_matrix_init_translate (&pattern.base.matrix, extents->x, extents->y);
347 pattern.base.filter = CAIRO_FILTER_NEAREST;
348 status = _cairo_surface_paint (pixel, CAIRO_OPERATOR_SOURCE, &pattern.base, NULL);
349 _cairo_pattern_fini (&pattern.base);
351 if (unlikely (status)) {
352 _cairo_surface_release_source_image (surface, image, image_extra);
353 cairo_surface_destroy (pixel);
357 argb = *(uint32_t *) ((cairo_image_surface_t *) pixel)->data;
358 cairo_surface_destroy (pixel);
360 argb = ((uint32_t *) (image->data + extents->y * image->stride))[extents->x];
363 _cairo_surface_release_source_image (surface, image, image_extra);
368 src->base.constants[0] = ((argb >> 16) & 0xff) / 255.;
369 src->base.constants[1] = ((argb >> 8) & 0xff) / 255.;
370 src->base.constants[2] = ((argb >> 0) & 0xff) / 255.;
371 src->base.constants[3] = ((argb >> 24) & 0xff) / 255.;
372 src->base.constants_size = 4;
374 src->base.content = CAIRO_CONTENT_COLOR_ALPHA;
375 if (CAIRO_ALPHA_IS_OPAQUE(src->base.constants[3]))
376 src->base.content &= ~CAIRO_CONTENT_ALPHA;
377 src->type.fragment = FS_CONSTANT;
378 src->type.vertex = VS_NONE;
379 src->type.pattern = PATTERN_SOLID;
381 return CAIRO_STATUS_SUCCESS;
384 static cairo_status_t
385 i965_shader_acquire_surface (i965_shader_t *shader,
386 union i965_shader_channel *src,
387 const cairo_surface_pattern_t *pattern,
388 const cairo_rectangle_int_t *extents)
390 cairo_surface_t *surface, *drm;
392 cairo_status_t status;
393 int src_x = 0, src_y = 0;
395 assert (src->type.fragment == FS_NONE);
396 drm = surface = pattern->surface;
398 if (surface->type == CAIRO_SURFACE_TYPE_DRM) {
399 if (surface->backend->type == CAIRO_SURFACE_TYPE_SUBSURFACE) {
400 drm = ((cairo_surface_subsurface_t *) surface)->target;
401 } else if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) {
402 drm = ((cairo_surface_snapshot_t *) surface)->target;
406 src->type.pattern = PATTERN_SURFACE;
407 src->surface.surface = NULL;
408 if (drm->type == CAIRO_SURFACE_TYPE_DRM) {
409 i965_surface_t *s = (i965_surface_t *) drm;
411 if (surface->backend->type == CAIRO_SURFACE_TYPE_SUBSURFACE) {
412 if (s->intel.drm.base.device == shader->target->intel.drm.base.device) {
413 cairo_surface_subsurface_t *sub = (cairo_surface_subsurface_t *) surface;
414 if (s != shader->target) {
417 if (s->intel.drm.fallback != NULL) {
418 status = intel_surface_flush (s);
419 if (unlikely (status))
423 if (to_intel_bo (s->intel.drm.bo)->batch_write_domain)
424 i965_pipelined_flush (i965_device (s));
426 src->type.fragment = FS_SURFACE;
428 src->base.bo = to_intel_bo (s->intel.drm.bo);
429 src->base.format = s->intel.drm.format;
430 src->base.content = s->intel.drm.base.content;
431 src->base.width = sub->extents.width;
432 src->base.height = sub->extents.height;
433 src->base.stride = s->intel.drm.stride;
436 if (s->intel.drm.format != CAIRO_FORMAT_A8)
439 /* XXX tiling restrictions upon offset? */
440 //src->base.offset[0] = s->offset + sub->extents.y * s->intel.drm.stride + x;
442 i965_surface_t *clone;
443 cairo_surface_pattern_t pattern;
445 clone = (i965_surface_t *)
446 i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device,
447 s->intel.drm.base.content,
452 if (unlikely (clone->intel.drm.base.status))
453 return clone->intel.drm.base.status;
455 _cairo_pattern_init_for_surface (&pattern, &s->intel.drm.base);
456 pattern.base.filter = CAIRO_FILTER_NEAREST;
457 cairo_matrix_init_translate (&pattern.base.matrix,
458 sub->extents.x, sub->extents.y);
460 status = _cairo_surface_paint (&clone->intel.drm.base,
461 CAIRO_OPERATOR_SOURCE,
465 _cairo_pattern_fini (&pattern.base);
467 if (unlikely (status)) {
468 cairo_surface_destroy (&clone->intel.drm.base);
472 i965_pipelined_flush (i965_device (s));
473 src->type.fragment = FS_SURFACE;
475 src->base.bo = to_intel_bo (clone->intel.drm.bo);
476 src->base.format = clone->intel.drm.format;
477 src->base.content = clone->intel.drm.base.content;
478 src->base.width = clone->intel.drm.width;
479 src->base.height = clone->intel.drm.height;
480 src->base.stride = clone->intel.drm.stride;
482 src->surface.surface = &clone->intel.drm.base;
485 src_x = sub->extents.x;
486 src_y = sub->extents.y;
489 if (s->intel.drm.base.device == shader->target->intel.drm.base.device) {
490 if (s != shader->target) {
491 if (s->intel.drm.fallback != NULL) {
492 status = intel_surface_flush (s);
493 if (unlikely (status))
497 if (to_intel_bo (s->intel.drm.bo)->batch_write_domain)
498 i965_pipelined_flush (i965_device (s));
500 src->type.fragment = FS_SURFACE;
502 src->base.bo = to_intel_bo (s->intel.drm.bo);
503 src->base.format = s->intel.drm.format;
504 src->base.content = s->intel.drm.base.content;
505 src->base.width = s->intel.drm.width;
506 src->base.height = s->intel.drm.height;
507 src->base.stride = s->intel.drm.stride;
509 i965_surface_t *clone;
510 cairo_surface_pattern_t pattern;
512 clone = (i965_surface_t *)
513 i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device,
514 s->intel.drm.base.content,
519 if (unlikely (clone->intel.drm.base.status))
520 return clone->intel.drm.base.status;
522 _cairo_pattern_init_for_surface (&pattern, &s->intel.drm.base);
523 pattern.base.filter = CAIRO_FILTER_NEAREST;
524 status = _cairo_surface_paint (&clone->intel.drm.base,
525 CAIRO_OPERATOR_SOURCE,
529 _cairo_pattern_fini (&pattern.base);
531 if (unlikely (status)) {
532 cairo_surface_destroy (&clone->intel.drm.base);
536 i965_pipelined_flush (i965_device (s));
537 src->type.fragment = FS_SURFACE;
539 src->base.bo = to_intel_bo (clone->intel.drm.bo);
540 src->base.format = clone->intel.drm.format;
541 src->base.content = clone->intel.drm.base.content;
542 src->base.width = clone->intel.drm.width;
543 src->base.height = clone->intel.drm.height;
544 src->base.stride = clone->intel.drm.stride;
546 src->surface.surface = &clone->intel.drm.base;
552 if (src->type.fragment == FS_NONE) {
555 if (extents->width == 1 && extents->height == 1) {
556 return i965_shader_acquire_solid_surface (shader, src,
560 s = (i965_surface_t *)
561 _cairo_surface_has_snapshot (surface,
562 shader->target->intel.drm.base.backend);
564 i965_device_t *device = i965_device (shader->target);
565 intel_bo_t *bo = to_intel_bo (s->intel.drm.bo);
568 ! intel_bo_madvise (&device->intel, bo, I915_MADV_WILLNEED))
570 _cairo_surface_detach_snapshot (&s->intel.drm.base);
575 cairo_surface_reference (&s->intel.drm.base);
579 cairo_image_surface_t *image;
581 cairo_status_t status;
583 status = _cairo_surface_acquire_source_image (surface, &image, &image_extra);
584 if (unlikely (status))
587 if (image->width < 8192 && image->height < 8192) {
588 status = i965_surface_clone (i965_device (shader->target), image, &s);
590 status = i965_surface_clone_subimage (i965_device (shader->target),
596 _cairo_surface_release_source_image (surface, image, image_extra);
598 if (unlikely (status))
602 //intel_bo_mark_purgeable (to_intel_bo (s->intel.drm.bo), TRUE);
605 src->type.fragment = FS_SURFACE;
607 src->base.bo = to_intel_bo (s->intel.drm.bo);
608 src->base.content = s->intel.drm.base.content;
609 src->base.format = s->intel.drm.format;
610 src->base.width = s->intel.drm.width;
611 src->base.height = s->intel.drm.height;
612 src->base.stride = s->intel.drm.stride;
614 src->surface.surface = &s->intel.drm.base;
616 drm = &s->intel.drm.base;
619 /* XXX transform nx1 or 1xn surfaces to 1D? */
621 src->type.vertex = VS_NONE;
623 src->base.extend = i965_extend (pattern->base.extend);
624 if (pattern->base.extend == CAIRO_EXTEND_NONE &&
625 extents->x >= 0 && extents->y >= 0 &&
626 extents->x + extents->width <= src->base.width &&
627 extents->y + extents->height <= src->base.height)
629 /* Convert a wholly contained NONE to a REFLECT as the contiguous sampler
630 * cannot not handle CLAMP_BORDER textures.
632 src->base.extend = i965_extend (CAIRO_EXTEND_REFLECT);
633 /* XXX also need to check |u,v| < 3 */
636 src->base.filter = i965_filter (pattern->base.filter);
637 if (_cairo_matrix_is_pixel_exact (&pattern->base.matrix))
638 src->base.filter = i965_filter (CAIRO_FILTER_NEAREST);
640 /* tweak the src matrix to map from dst to texture coordinates */
641 src->base.matrix = pattern->base.matrix;
643 cairo_matrix_translate (&src->base.matrix, src_x, src_x);
644 cairo_matrix_init_scale (&m, 1. / src->base.width, 1. / src->base.height);
645 cairo_matrix_multiply (&src->base.matrix, &src->base.matrix, &m);
647 return CAIRO_STATUS_SUCCESS;
651 i965_shader_acquire_pattern (i965_shader_t *shader,
652 union i965_shader_channel *src,
653 const cairo_pattern_t *pattern,
654 const cairo_rectangle_int_t *extents)
656 switch (pattern->type) {
657 case CAIRO_PATTERN_TYPE_SOLID:
658 return i965_shader_acquire_solid (shader, src,
659 (cairo_solid_pattern_t *) pattern,
662 case CAIRO_PATTERN_TYPE_LINEAR:
663 return i965_shader_acquire_linear (shader, src,
664 (cairo_linear_pattern_t *) pattern,
667 case CAIRO_PATTERN_TYPE_RADIAL:
668 return i965_shader_acquire_radial (shader, src,
669 (cairo_radial_pattern_t *) pattern,
672 case CAIRO_PATTERN_TYPE_SURFACE:
673 return i965_shader_acquire_surface (shader, src,
674 (cairo_surface_pattern_t *) pattern,
679 return CAIRO_STATUS_SUCCESS;
684 i965_shader_channel_init (union i965_shader_channel *channel)
686 channel->type.vertex = VS_NONE;
687 channel->type.fragment = FS_NONE;
688 channel->type.pattern = PATTERN_NONE;
690 channel->base.mode = 0;
691 channel->base.bo = NULL;
692 channel->base.filter = i965_extend (CAIRO_FILTER_NEAREST);
693 channel->base.extend = i965_extend (CAIRO_EXTEND_NONE);
694 channel->base.has_component_alpha = 0;
695 channel->base.constants_size = 0;
699 i965_shader_init (i965_shader_t *shader,
703 shader->committed = FALSE;
704 shader->device = i965_device (dst);
705 shader->target = dst;
707 shader->constants_size = 0;
709 shader->need_combine = FALSE;
711 i965_shader_channel_init (&shader->source);
712 i965_shader_channel_init (&shader->mask);
713 i965_shader_channel_init (&shader->clip);
714 i965_shader_channel_init (&shader->dst);
718 i965_shader_fini (i965_shader_t *shader)
720 if (shader->source.type.pattern == PATTERN_SURFACE)
721 cairo_surface_destroy (shader->source.surface.surface);
722 if (shader->mask.type.pattern == PATTERN_SURFACE)
723 cairo_surface_destroy (shader->mask.surface.surface);
724 if (shader->clip.type.pattern == PATTERN_SURFACE)
725 cairo_surface_destroy (shader->clip.surface.surface);
726 if (shader->dst.type.pattern == PATTERN_SURFACE)
727 cairo_surface_destroy (shader->dst.surface.surface);
731 i965_shader_set_clip (i965_shader_t *shader,
734 cairo_surface_t *clip_surface;
736 union i965_shader_channel *channel;
739 clip_surface = _cairo_clip_get_surface (clip, &shader->target->intel.drm.base, &clip_x, &clip_y);
740 assert (clip_surface->status == CAIRO_STATUS_SUCCESS);
741 assert (clip_surface->type == CAIRO_SURFACE_TYPE_DRM);
742 s = (i965_surface_t *) clip_surface;
744 if (to_intel_bo (s->intel.drm.bo)->batch_write_domain)
745 i965_pipelined_flush (i965_device (s));
747 channel = &shader->clip;
748 channel->type.pattern = PATTERN_BASE;
749 channel->type.vertex = VS_NONE;
750 channel->type.fragment = FS_SURFACE;
752 channel->base.bo = to_intel_bo (s->intel.drm.bo);
753 channel->base.content = CAIRO_CONTENT_ALPHA;
754 channel->base.format = CAIRO_FORMAT_A8;
755 channel->base.width = s->intel.drm.width;
756 channel->base.height = s->intel.drm.height;
757 channel->base.stride = s->intel.drm.stride;
759 channel->base.extend = i965_extend (CAIRO_EXTEND_NONE);
760 channel->base.filter = i965_filter (CAIRO_FILTER_NEAREST);
762 cairo_matrix_init_scale (&shader->clip.base.matrix,
763 1. / s->intel.drm.width,
764 1. / s->intel.drm.height);
766 cairo_matrix_translate (&shader->clip.base.matrix,
771 i965_shader_check_aperture (i965_shader_t *shader,
772 i965_device_t *device)
774 uint32_t size = device->exec.gtt_size;
776 if (shader->target != device->target) {
777 const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo);
778 if (bo->exec == NULL)
779 size += bo->base.size;
782 if (shader->source.base.bo != NULL && shader->source.base.bo != device->source) {
783 const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo);
784 if (bo->exec == NULL)
785 size += bo->base.size;
788 if (shader->mask.base.bo != NULL && shader->mask.base.bo != device->mask) {
789 const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo);
790 if (bo->exec == NULL)
791 size += bo->base.size;
794 if (shader->clip.base.bo != NULL && shader->clip.base.bo != device->clip) {
795 const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo);
796 if (bo->exec == NULL)
797 size += bo->base.size;
800 return size <= device->intel.gtt_avail_size;
803 static cairo_status_t
804 i965_shader_setup_dst (i965_shader_t *shader)
806 union i965_shader_channel *channel;
807 i965_surface_t *s, *clone;
809 /* We need to manual blending if we have a clip surface and an unbounded op,
810 * or an extended blend mode.
812 if (shader->need_combine ||
813 (shader->op < CAIRO_OPERATOR_SATURATE &&
814 (shader->clip.type.fragment == FS_NONE ||
815 _cairo_operator_bounded_by_mask (shader->op))))
817 return CAIRO_STATUS_SUCCESS;
820 shader->need_combine = TRUE;
824 /* we need to allocate a new render target and use the original as a source */
825 clone = (i965_surface_t *)
826 i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device,
827 s->intel.drm.base.content,
832 if (unlikely (clone->intel.drm.base.status))
833 return clone->intel.drm.base.status;
835 if (to_intel_bo (s->intel.drm.bo)->batch_write_domain)
836 i965_pipelined_flush (i965_device (s));
838 channel = &shader->dst;
840 channel->type.vertex = VS_NONE;
841 channel->type.fragment = FS_SURFACE;
842 channel->type.pattern = PATTERN_SURFACE;
844 /* swap buffer objects */
845 channel->base.bo = to_intel_bo (s->intel.drm.bo);
846 s->intel.drm.bo = ((cairo_drm_surface_t *) clone)->bo;
847 ((cairo_drm_surface_t *) clone)->bo = &channel->base.bo->base;
849 channel->base.content = s->intel.drm.base.content;
850 channel->base.format = s->intel.drm.format;
851 channel->base.width = s->intel.drm.width;
852 channel->base.height = s->intel.drm.height;
853 channel->base.stride = s->intel.drm.stride;
855 channel->base.filter = i965_filter (CAIRO_FILTER_NEAREST);
856 channel->base.extend = i965_extend (CAIRO_EXTEND_NONE);
858 cairo_matrix_init_scale (&channel->base.matrix,
859 1. / s->intel.drm.width,
860 1. / s->intel.drm.height);
862 channel->surface.surface = &clone->intel.drm.base;
864 s->intel.drm.base.content = clone->intel.drm.base.content;
865 s->intel.drm.format = clone->intel.drm.format;
866 assert (s->intel.drm.width == clone->intel.drm.width);
867 assert (s->intel.drm.height == clone->intel.drm.height);
868 s->intel.drm.stride = clone->intel.drm.stride;
870 return CAIRO_STATUS_SUCCESS;
874 constant_add_float (i965_shader_t *shader, float v)
876 shader->constants[shader->constants_size++] = v;
880 i965_shader_copy_channel_constants (i965_shader_t *shader,
881 const union i965_shader_channel *channel)
883 if (channel->base.constants_size) {
884 assert (shader->constants_size + channel->base.constants_size < ARRAY_LENGTH (shader->constants));
886 memcpy (shader->constants + shader->constants_size,
887 channel->base.constants,
888 sizeof (float) * channel->base.constants_size);
889 shader->constants_size += channel->base.constants_size;
894 i965_shader_setup_channel_constants (i965_shader_t *shader,
895 const union i965_shader_channel *channel)
897 switch (channel->type.fragment) {
900 /* no plane equations */
904 constant_add_float (shader, channel->base.matrix.xx);
905 constant_add_float (shader, channel->base.matrix.xy);
906 constant_add_float (shader, 0);
907 constant_add_float (shader, channel->base.matrix.x0);
912 constant_add_float (shader, channel->base.matrix.xx);
913 constant_add_float (shader, channel->base.matrix.xy);
914 constant_add_float (shader, 0);
915 constant_add_float (shader, channel->base.matrix.x0);
917 constant_add_float (shader, channel->base.matrix.yx);
918 constant_add_float (shader, channel->base.matrix.yy);
919 constant_add_float (shader, 0);
920 constant_add_float (shader, channel->base.matrix.y0);
925 /* use pue from SF */
929 i965_shader_copy_channel_constants (shader, channel);
933 i965_shader_setup_constants (i965_shader_t *shader)
935 i965_shader_setup_channel_constants (shader, &shader->source);
936 i965_shader_setup_channel_constants (shader, &shader->mask);
937 i965_shader_setup_channel_constants (shader, &shader->clip);
938 i965_shader_setup_channel_constants (shader, &shader->dst);
939 assert (shader->constants_size < ARRAY_LENGTH (shader->constants));
943 * Highest-valued BLENDFACTOR used in i965_blend_op.
945 * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR,
946 * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
947 * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
949 #define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1)
952 i965_shader_get_blend_cntl (const i965_shader_t *shader,
953 uint32_t *sblend, uint32_t *dblend)
955 static const struct blendinfo {
956 cairo_bool_t dst_alpha;
957 cairo_bool_t src_alpha;
960 } i965_blend_op[] = {
961 /* CAIRO_OPERATOR_CLEAR treat as SOURCE with transparent */
962 {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO},
963 /* CAIRO_OPERATOR_SOURCE */
964 {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO},
965 /* CAIRO_OPERATOR_OVER */
966 {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA},
967 /* CAIRO_OPERATOR_IN */
968 {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO},
969 /* CAIRO_OPERATOR_OUT */
970 {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO},
971 /* CAIRO_OPERATOR_ATOP */
972 {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA},
974 /* CAIRO_OPERATOR_DEST */
975 {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE},
976 /* CAIRO_OPERATOR_DEST_OVER */
977 {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE},
978 /* CAIRO_OPERATOR_DEST_IN */
979 {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA},
980 /* CAIRO_OPERATOR_DEST_OUT */
981 {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA},
982 /* CAIRO_OPERATOR_DEST_ATOP */
983 {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA},
984 /* CAIRO_OPERATOR_XOR */
985 {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA},
986 /* CAIRO_OPERATOR_ADD */
987 {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE},
989 const struct blendinfo *op = &i965_blend_op[shader->op];
991 *sblend = op->src_blend;
992 *dblend = op->dst_blend;
994 /* If there's no dst alpha channel, adjust the blend op so that we'll treat
997 if (shader->target->intel.drm.base.content == CAIRO_CONTENT_COLOR &&
1000 if (*sblend == BRW_BLENDFACTOR_DST_ALPHA)
1001 *sblend = BRW_BLENDFACTOR_ONE;
1002 else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA)
1003 *sblend = BRW_BLENDFACTOR_ZERO;
1008 emit_wm_subpans_to_pixels (struct brw_compile *compile,
1012 * R1.5 x/y of upper-left pixel of subspan 3
1013 * R1.4 x/y of upper-left pixel of subspan 2
1014 * R1.3 x/y of upper-left pixel of subspan 1
1015 * R1.2 x/y of upper-left pixel of subspan 0
1021 * upper left, upper right, lower left, lower right.
1024 /* compute pixel locations for each subspan */
1025 brw_set_compression_control (compile, BRW_COMPRESSION_NONE);
1028 brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 4,
1029 BRW_REGISTER_TYPE_UW,
1030 BRW_VERTICAL_STRIDE_2,
1032 BRW_HORIZONTAL_STRIDE_0,
1035 brw_imm_vf4 (VF_ZERO, VF_ONE, VF_ZERO, VF_ONE));
1037 brw_vec8_grf (tmp+1),
1038 brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 8,
1039 BRW_REGISTER_TYPE_UW,
1040 BRW_VERTICAL_STRIDE_2,
1042 BRW_HORIZONTAL_STRIDE_0,
1045 brw_imm_vf4 (VF_ZERO, VF_ONE, VF_ZERO, VF_ONE));
1047 brw_vec8_grf (tmp+2),
1048 brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 5,
1049 BRW_REGISTER_TYPE_UW,
1050 BRW_VERTICAL_STRIDE_2,
1052 BRW_HORIZONTAL_STRIDE_0,
1055 brw_imm_vf4 (VF_ZERO, VF_ZERO, VF_ONE, VF_ONE));
1057 brw_vec8_grf (tmp+3),
1058 brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 9,
1059 BRW_REGISTER_TYPE_UW,
1060 BRW_VERTICAL_STRIDE_2,
1062 BRW_HORIZONTAL_STRIDE_0,
1065 brw_imm_vf4 (VF_ZERO, VF_ZERO, VF_ONE, VF_ONE));
1066 brw_set_compression_control (compile, BRW_COMPRESSION_COMPRESSED);
1070 emit_wm_affine (struct brw_compile *compile,
1071 int tmp, int reg, int msg)
1073 emit_wm_subpans_to_pixels (compile, tmp);
1077 brw_vec1_grf (reg, 0),
1078 brw_vec8_grf (tmp));
1080 brw_message_reg (msg + 1),
1081 brw_vec1_grf (reg, 1),
1082 brw_vec8_grf (tmp+2));
1086 brw_vec1_grf (reg, 4),
1087 brw_vec8_grf (tmp));
1089 brw_message_reg (msg + 3),
1090 brw_vec1_grf (reg, 5),
1091 brw_vec8_grf (tmp+2));
1095 emit_wm_glyph (struct brw_compile *compile,
1096 int tmp, int vue, int msg)
1098 emit_wm_subpans_to_pixels (compile, tmp);
1103 brw_imm_f (1./1024));
1105 brw_message_reg (msg + 1),
1107 brw_vec1_grf (vue, 0));
1111 brw_vec8_grf (tmp + 2),
1112 brw_imm_f (1./1024));
1114 brw_message_reg (msg + 3),
1116 brw_vec1_grf (vue, 1));
1120 emit_wm_load_constant (struct brw_compile *compile,
1122 struct brw_reg *result)
1126 for (n = 0; n < 4; n++) {
1127 result[n] = result[n+4] = brw_reg (BRW_GENERAL_REGISTER_FILE, reg, n,
1128 BRW_REGISTER_TYPE_F,
1129 BRW_VERTICAL_STRIDE_0,
1131 BRW_HORIZONTAL_STRIDE_0,
1138 emit_wm_load_opacity (struct brw_compile *compile,
1140 struct brw_reg *result)
1142 result[0] = result[1] = result[2] = result[3] =
1143 result[4] = result[5] = result[6] = result[7] =
1144 brw_reg (BRW_GENERAL_REGISTER_FILE, reg, 0,
1145 BRW_REGISTER_TYPE_F,
1146 BRW_VERTICAL_STRIDE_0,
1148 BRW_HORIZONTAL_STRIDE_1,
1154 emit_wm_load_linear (struct brw_compile *compile,
1155 int tmp, int reg, int msg)
1157 emit_wm_subpans_to_pixels (compile, tmp);
1161 brw_vec1_grf (reg, 0),
1162 brw_vec8_grf (tmp));
1164 brw_message_reg(msg + 1),
1165 brw_vec1_grf (reg, 1),
1166 brw_vec8_grf (tmp + 2));
1170 emit_wm_load_radial (struct brw_compile *compile,
1174 struct brw_reg c1x = brw_vec1_grf (reg, 0);
1175 struct brw_reg c1y = brw_vec1_grf (reg, 1);
1176 struct brw_reg minus_r_sq = brw_vec1_grf (reg, 3);
1177 struct brw_reg cdx = brw_vec1_grf (reg, 4);
1178 struct brw_reg cdy = brw_vec1_grf (reg, 5);
1179 struct brw_reg neg_4a = brw_vec1_grf (reg + 1, 0);
1180 struct brw_reg inv_2a = brw_vec1_grf (reg + 1, 1);
1182 struct brw_reg tmp_x = brw_uw16_grf (30, 0);
1183 struct brw_reg tmp_y = brw_uw16_grf (28, 0);
1184 struct brw_reg det = brw_vec8_grf (22);
1185 struct brw_reg b = brw_vec8_grf (20);
1186 struct brw_reg c = brw_vec8_grf (18);
1187 struct brw_reg pdx = brw_vec8_grf (16);
1188 struct brw_reg pdy = brw_vec8_grf (14);
1189 struct brw_reg t = brw_message_reg (msg + 1);
1191 /* cdx = (c₂x - c₁x)
1197 * A = cdx² + cdy² - dr²
1198 * B = -2·(pdx·cdx + pdy·cdy + r₁·dr)
1199 * C = pdx² + pdy² - r₁²
1201 * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A
1204 brw_ADD (compile, pdx, vec8 (tmp_x), negate (c1x));
1205 brw_ADD (compile, pdy, vec8 (tmp_y), negate (c1y));
1207 brw_LINE (compile, brw_null_reg (), cdx, pdx);
1208 brw_MAC (compile, b, cdy, pdy);
1210 brw_MUL (compile, brw_null_reg (), pdx, pdx);
1211 brw_MAC (compile, c, pdy, pdy);
1212 brw_ADD (compile, c, c, minus_r_sq);
1214 brw_MUL (compile, brw_null_reg (), b, b);
1215 brw_MAC (compile, det, neg_4a, c);
1217 /* XXX use rsqrt like i915?, it's faster and we need to mac anyway */
1220 BRW_MATH_FUNCTION_SQRT,
1221 BRW_MATH_SATURATE_NONE,
1224 BRW_MATH_DATA_VECTOR,
1225 BRW_MATH_PRECISION_FULL);
1229 brw_ADD (compile, det, negate (det), negate (b));
1230 brw_ADD (compile, det, det, negate (b));
1231 brw_MUL (compile, t, det, inv_2a);
1235 emit_wm_sample (struct brw_compile *compile,
1236 union i965_shader_channel *channel,
1238 int msg_base, int msg_len,
1240 struct brw_reg *result)
1242 int response_len, mask;
1244 if (channel->base.content == CAIRO_CONTENT_ALPHA) {
1247 result[0] = result[1] = result[2] = result[3] = brw_vec8_grf (dst);
1248 result[4] = result[5] = result[6] = result[7] = brw_vec8_grf (dst + 1);
1252 result[0] = brw_vec8_grf (dst + 0);
1253 result[1] = brw_vec8_grf (dst + 2);
1254 result[2] = brw_vec8_grf (dst + 4);
1255 result[3] = brw_vec8_grf (dst + 6);
1256 result[4] = brw_vec8_grf (dst + 1);
1257 result[5] = brw_vec8_grf (dst + 3);
1258 result[6] = brw_vec8_grf (dst + 5);
1259 result[7] = brw_vec8_grf (dst + 7);
1262 brw_set_compression_control (compile, BRW_COMPRESSION_NONE);
1264 brw_set_mask_control (compile, BRW_MASK_DISABLE);
1266 get_element_ud (brw_vec8_grf (0), 2),
1268 brw_set_mask_control (compile, BRW_MASK_ENABLE);
1270 brw_SAMPLE (compile,
1271 brw_uw16_grf (dst, 0),
1274 sampler + 1, /* binding table */
1277 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE,
1282 brw_set_compression_control (compile, BRW_COMPRESSION_COMPRESSED);
1284 return response_len;
1287 #define MAX_MSG_REGISTER 16
1290 emit_wm_load_channel (struct brw_compile *compile,
1291 union i965_shader_channel *channel,
1297 struct brw_reg *result)
1299 switch (channel->type.fragment) {
1304 emit_wm_load_constant (compile, *cue, result);
1309 emit_wm_load_radial (compile, *cue, *msg);
1312 if (*msg + 3 > MAX_MSG_REGISTER)
1315 *grf += emit_wm_sample (compile, channel, *sampler, *msg, 3, *grf, result);
1321 emit_wm_load_linear (compile, *grf, *cue, *msg);
1324 if (*msg + 3 > MAX_MSG_REGISTER)
1327 *grf += emit_wm_sample (compile, channel, *sampler, *msg, 3, *grf, result);
1333 emit_wm_affine (compile, *grf, *cue, *msg);
1336 if (*msg + 5 > MAX_MSG_REGISTER)
1339 *grf += emit_wm_sample (compile, channel, *sampler, *msg, 5, *grf, result);
1345 emit_wm_load_opacity (compile, *vue, result);
1350 emit_wm_glyph (compile, *grf, *vue, *msg);
1353 if (*msg + 5 > MAX_MSG_REGISTER)
1356 *grf += emit_wm_sample (compile, channel, *sampler, *msg, 5, *grf, result);
1363 static unsigned long
1364 i965_wm_kernel_hash (const i965_shader_t *shader)
1369 (shader->source.type.fragment & 0xff) |
1370 (shader->mask.type.fragment & 0xff) << 8 |
1371 (shader->clip.type.fragment & 0xff) << 16;
1372 if (shader->need_combine)
1373 hash |= (1 + shader->op) << 24;
1379 i965_wm_kernel_init (struct i965_wm_kernel *key,
1380 const i965_shader_t *shader)
1382 key->entry.hash = i965_wm_kernel_hash (shader);
1386 i965_shader_const_urb_length (i965_shader_t *shader)
1388 const int lengths[] = { 0, 1, 1, 4, 2, 0, 0 };
1389 int count = 0; /* 128-bit/16-byte increments */
1391 count += lengths[shader->source.type.fragment];
1392 count += lengths[shader->mask.type.fragment];
1393 count += lengths[shader->clip.type.fragment];
1394 count += lengths[shader->dst.type.fragment];
1396 return (count + 1) / 2; /* 256-bit/32-byte increments */
1400 i965_shader_pue_length (i965_shader_t *shader)
1402 return 1 + (shader->mask.type.vertex != VS_NONE);
1406 create_wm_kernel (i965_device_t *device,
1407 i965_shader_t *shader,
1410 struct brw_compile compile;
1411 struct brw_reg source[8], mask[8], clip[8], dst[8];
1412 const uint32_t *program;
1414 int msg, cue, vue, grf, sampler;
1417 struct i965_wm_kernel key, *cache;
1418 cairo_status_t status;
1421 i965_wm_kernel_init (&key, shader);
1422 cache = _cairo_hash_table_lookup (device->wm_kernels, &key.entry);
1424 return cache->offset;
1426 brw_compile_init (&compile, device->is_g4x);
1428 if (key.entry.hash == FS_CONSTANT &&
1429 to_intel_bo (shader->target->intel.drm.bo)->tiling)
1431 struct brw_instruction *insn;
1433 assert (i965_shader_const_urb_length (shader) == 1);
1434 brw_MOV (&compile, brw_message4_reg (2), brw_vec4_grf (2, 0));
1437 brw_push_insn_state (&compile);
1438 brw_set_mask_control (&compile, BRW_MASK_DISABLE); /* ? */
1440 retype (brw_message_reg (1), BRW_REGISTER_TYPE_UD),
1441 retype (brw_vec8_grf (1), BRW_REGISTER_TYPE_UD));
1442 brw_pop_insn_state (&compile);
1444 insn = brw_next_instruction (&compile, BRW_OPCODE_SEND);
1445 insn->header.predicate_control = 0;
1446 insn->header.compression_control = BRW_COMPRESSION_NONE;
1447 insn->header.destreg__conditonalmod = 0;
1449 brw_instruction_set_destination (insn,
1450 retype (vec16 (brw_acc_reg ()),
1451 BRW_REGISTER_TYPE_UW));
1453 brw_instruction_set_source0 (insn,
1454 retype (brw_vec8_grf (0),
1455 BRW_REGISTER_TYPE_UW));
1457 brw_instruction_set_dp_write_message (insn,
1459 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED, /* msg_control */
1460 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1462 1, /* pixel scoreboard */
1470 vue = cue + i965_shader_const_urb_length (shader);
1471 grf = vue + i965_shader_pue_length (shader);
1474 brw_set_compression_control (&compile, BRW_COMPRESSION_COMPRESSED);
1475 emit_wm_load_channel (&compile, &shader->source,
1476 &vue, &cue, &msg, &sampler, &grf,
1478 emit_wm_load_channel (&compile, &shader->mask,
1479 &vue, &cue, &msg, &sampler, &grf,
1481 emit_wm_load_channel (&compile, &shader->clip,
1482 &vue, &cue, &msg, &sampler, &grf,
1484 emit_wm_load_channel (&compile, &shader->dst,
1485 &vue, &cue, &msg, &sampler, &grf,
1487 brw_set_compression_control (&compile, BRW_COMPRESSION_NONE);
1489 if (shader->need_combine) {
1490 if (shader->mask.type.fragment != FS_NONE &&
1491 shader->clip.type.fragment != FS_NONE)
1493 for (i = 0; i < 8; i++)
1494 brw_MUL (&compile, mask[i], mask[i], clip[i]);
1498 for (i = 0; i < 8; i++)
1499 brw_MOV (&compile, brw_message_reg (2 + i), source[i]);
1501 if (shader->mask.type.fragment != FS_NONE) {
1502 if (shader->clip.type.fragment != FS_NONE) {
1503 for (i = 0; i < 8; i++)
1504 brw_MUL (&compile, mask[i], mask[i], clip[i]);
1507 for (i = 0; i < 8; i++)
1508 brw_MUL (&compile, brw_message_reg (2 + i), source[i], mask[i]);
1510 if (shader->clip.type.fragment != FS_NONE) {
1511 for (i = 0; i < 8; i++)
1512 brw_MUL (&compile, brw_message_reg (2 + i), source[i], clip[i]);
1514 for (i = 0; i < 8; i++)
1515 brw_MOV (&compile, brw_message_reg (2 + i), source[i]);
1520 brw_push_insn_state (&compile);
1521 brw_set_mask_control (&compile, BRW_MASK_DISABLE); /* ? */
1523 retype (brw_message_reg (1), BRW_REGISTER_TYPE_UD),
1524 retype (brw_vec8_grf (1), BRW_REGISTER_TYPE_UD));
1525 brw_pop_insn_state (&compile);
1527 brw_fb_WRITE (&compile,
1528 retype (vec16 (brw_acc_reg ()), BRW_REGISTER_TYPE_UW),
1530 retype (brw_vec8_grf (0), BRW_REGISTER_TYPE_UW),
1531 0, /* binding table index */
1532 2 + 8, /* msg length */
1533 0, /* response length */
1537 program = brw_get_program (&compile, &size);
1540 i965_stream_align (&device->general, 64);
1541 offset = i965_stream_emit (&device->general, program, size);
1543 cache = _cairo_freelist_alloc (&device->wm_kernel_freelist);
1544 if (likely (cache != NULL)) {
1545 i965_wm_kernel_init (cache, shader);
1546 cache->offset = offset;
1547 status = _cairo_hash_table_insert (device->wm_kernels, &cache->entry);
1548 if (unlikely (status))
1549 _cairo_freelist_free (&device->wm_kernel_freelist, cache);
1556 create_sf_kernel (i965_device_t *device,
1557 i965_shader_t *shader)
1559 struct brw_compile compile;
1560 const uint32_t *program;
1564 brw_compile_init (&compile, device->is_g4x);
1566 switch (shader->mask.type.vertex) {
1569 /* use curb plane eq in WM */
1574 /* just a constant opacity */
1576 brw_message4_reg (1),
1577 brw_vec4_grf (3, 0));
1582 /* an offset+sf into the glyph cache */
1585 brw_vec2_grf (3, 0));
1587 brw_message4_reg (1),
1588 negate (brw_vec2_grf (1, 4)),
1589 brw_imm_f (1./1024));
1594 brw_urb_WRITE (&compile,
1597 brw_vec8_grf (0), /* r0, will be copied to m0 */
1601 0, /* response len */
1603 1, /* writes complete */
1605 BRW_URB_SWIZZLE_NONE);
1607 program = brw_get_program (&compile, &size);
1609 i965_stream_align (&device->general, 64);
1610 return i965_stream_emit (&device->general, program, size);
1614 i965_sf_kernel (const i965_shader_t *shader)
1616 return shader->mask.type.vertex;
1620 i965_sf_state_init (struct i965_sf_state *key,
1621 const i965_shader_t *shader)
1623 key->entry.hash = i965_sf_kernel (shader);
1627 i965_sf_state_equal (const void *A, const void *B)
1629 const cairo_hash_entry_t *a = A, *b = B;
1630 return a->hash == b->hash;
1634 * Sets up the SF state pointing at an SF kernel.
1636 * The SF kernel does coord interp: for each attribute,
1637 * calculate dA/dx and dA/dy. Hand these interpolation coefficients
1638 * back to SF which then hands pixels off to WM.
1641 gen4_create_sf_state (i965_device_t *device,
1642 i965_shader_t *shader)
1644 struct brw_sf_unit_state *state;
1645 struct i965_sf_state key, *cache;
1646 cairo_status_t status;
1649 i965_sf_state_init (&key, shader);
1650 if (i965_sf_state_equal (&key, &device->sf_state))
1651 return device->sf_state.offset;
1653 cache = _cairo_hash_table_lookup (device->sf_states, &key.entry);
1654 if (cache != NULL) {
1655 offset = cache->offset;
1659 offset = create_sf_kernel (device, shader);
1661 state = i965_stream_alloc (&device->general, 32, sizeof (*state));
1662 memset (state, 0, sizeof (*state));
1664 state->thread0.grf_reg_count = BRW_GRF_BLOCKS (3);
1665 assert ((offset & 63) == 0);
1666 state->thread0.kernel_start_pointer = offset >> 6;
1667 state->sf1.single_program_flow = 1;
1668 state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
1669 state->thread3.urb_entry_read_offset = 1;
1670 state->thread3.dispatch_grf_start_reg = 3;
1671 state->thread4.max_threads = SF_MAX_THREADS - 1;
1672 state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
1673 state->thread4.nr_urb_entries = URB_SF_ENTRIES;
1674 state->sf6.dest_org_vbias = 0x8;
1675 state->sf6.dest_org_hbias = 0x8;
1677 offset = i965_stream_offsetof (&device->general, state);
1679 cache = _cairo_freelist_alloc (&device->sf_freelist);
1680 if (likely (cache != NULL)) {
1681 i965_sf_state_init (cache, shader);
1682 cache->offset = offset;
1683 status = _cairo_hash_table_insert (device->sf_states, &cache->entry);
1684 if (unlikely (status))
1685 _cairo_freelist_free (&device->sf_freelist, cache);
1689 i965_sf_state_init (&device->sf_state, shader);
1690 device->sf_state.offset = offset;
1695 static unsigned long
1696 i965_shader_sampler_hash (const i965_shader_t *shader)
1698 unsigned long hash = 0;
1699 unsigned int offset = 0;
1701 if (shader->source.base.bo != NULL) {
1702 hash |= (shader->source.base.filter << offset) |
1703 (shader->source.base.extend << (offset + 4));
1707 if (shader->mask.base.bo != NULL) {
1708 hash |= (shader->mask.base.filter << offset) |
1709 (shader->mask.base.extend << (offset + 4));
1713 if (shader->clip.base.bo != NULL) {
1714 hash |= (shader->clip.base.filter << offset) |
1715 (shader->clip.base.extend << (offset + 4));
1719 if (shader->dst.base.bo != NULL) {
1720 hash |= (shader->dst.base.filter << offset) |
1721 (shader->dst.base.extend << (offset + 4));
1729 i965_sampler_init (struct i965_sampler *key,
1730 const i965_shader_t *shader)
1732 key->entry.hash = i965_shader_sampler_hash (shader);
1736 emit_sampler_channel (i965_device_t *device,
1737 const union i965_shader_channel *channel,
1738 uint32_t border_color)
1740 struct brw_sampler_state *state;
1742 state = i965_stream_alloc (&device->general, 0, sizeof (*state));
1743 memset (state, 0, sizeof (*state));
1745 state->ss0.lod_preclamp = 1; /* GL mode */
1747 state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY;
1749 state->ss0.min_filter = channel->base.filter;
1750 state->ss0.mag_filter = channel->base.filter;
1752 state->ss1.r_wrap_mode = channel->base.extend;
1753 state->ss1.s_wrap_mode = channel->base.extend;
1754 state->ss1.t_wrap_mode = channel->base.extend;
1756 assert ((border_color & 31) == 0);
1757 state->ss2.border_color_pointer = border_color >> 5;
1761 emit_sampler_state_table (i965_device_t *device,
1762 i965_shader_t *shader)
1764 struct i965_sampler key, *cache;
1765 cairo_status_t status;
1768 if (device->border_color_offset == (uint32_t) -1) {
1769 struct brw_sampler_legacy_border_color *border_color;
1771 border_color = i965_stream_alloc (&device->general, 32,
1772 sizeof (*border_color));
1773 border_color->color[0] = 0; /* R */
1774 border_color->color[1] = 0; /* G */
1775 border_color->color[2] = 0; /* B */
1776 border_color->color[3] = 0; /* A */
1778 device->border_color_offset = i965_stream_offsetof (&device->general,
1781 i965_sampler_init (&key, shader);
1782 cache = _cairo_hash_table_lookup (device->samplers, &key.entry);
1784 return cache->offset;
1787 i965_stream_align (&device->general, 32);
1788 offset = device->general.used;
1789 if (shader->source.base.bo != NULL) {
1790 emit_sampler_channel (device,
1792 device->border_color_offset);
1794 if (shader->mask.base.bo != NULL) {
1795 emit_sampler_channel (device,
1797 device->border_color_offset);
1799 if (shader->clip.base.bo != NULL) {
1800 emit_sampler_channel (device,
1802 device->border_color_offset);
1804 if (shader->dst.base.bo != NULL) {
1805 emit_sampler_channel (device,
1807 device->border_color_offset);
1810 cache = _cairo_freelist_alloc (&device->sampler_freelist);
1811 if (likely (cache != NULL)) {
1812 i965_sampler_init (cache, shader);
1813 cache->offset = offset;
1814 status = _cairo_hash_table_insert (device->samplers, &cache->entry);
1815 if (unlikely (status))
1816 _cairo_freelist_free (&device->sampler_freelist, cache);
1823 i965_cc_state_init (struct i965_cc_state *key,
1824 const i965_shader_t *shader)
1826 uint32_t src_blend, dst_blend;
1828 if (shader->need_combine)
1829 src_blend = dst_blend = 0;
1831 i965_shader_get_blend_cntl (shader, &src_blend, &dst_blend);
1833 key->entry.hash = src_blend | ((dst_blend & 0xffff) << 16);
1837 i965_cc_state_equal (const void *A, const void *B)
1839 const cairo_hash_entry_t *a = A, *b = B;
1840 return a->hash == b->hash;
1844 cc_state_emit (i965_device_t *device, i965_shader_t *shader)
1846 struct brw_cc_unit_state *state;
1847 struct i965_cc_state key, *cache;
1848 cairo_status_t status;
1849 uint32_t src_blend, dst_blend;
1852 i965_cc_state_init (&key, shader);
1853 if (i965_cc_state_equal (&key, &device->cc_state))
1854 return device->cc_state.offset;
1856 cache = _cairo_hash_table_lookup (device->cc_states, &key.entry);
1857 if (cache != NULL) {
1858 offset = cache->offset;
1862 if (shader->need_combine)
1863 src_blend = dst_blend = 0;
1865 i965_shader_get_blend_cntl (shader, &src_blend, &dst_blend);
1867 state = i965_stream_alloc (&device->general, 64, sizeof (*state));
1868 memset (state, 0, sizeof (*state));
1870 /* XXX Note errata, need to flush render cache when blend_enable 0 -> 1 */
1871 /* XXX 2 source blend */
1872 state->cc3.blend_enable = ! shader->need_combine;
1873 state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
1874 state->cc5.ia_src_blend_factor = src_blend;
1875 state->cc5.ia_dest_blend_factor = dst_blend;
1876 state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
1877 state->cc6.clamp_post_alpha_blend = 1;
1878 state->cc6.clamp_pre_alpha_blend = 1;
1879 state->cc6.src_blend_factor = src_blend;
1880 state->cc6.dest_blend_factor = dst_blend;
1882 offset = i965_stream_offsetof (&device->general, state);
1884 cache = _cairo_freelist_alloc (&device->cc_freelist);
1885 if (likely (cache != NULL)) {
1886 i965_cc_state_init (cache, shader);
1887 cache->offset = offset;
1888 status = _cairo_hash_table_insert (device->cc_states, &cache->entry);
1889 if (unlikely (status))
1890 _cairo_freelist_free (&device->cc_freelist, cache);
1894 i965_cc_state_init (&device->cc_state, shader);
1895 device->cc_state.offset = offset;
1901 i965_wm_state_init (struct i965_wm_state *key,
1902 const i965_shader_t *shader)
1904 key->kernel = i965_wm_kernel_hash (shader);
1905 key->sampler = i965_shader_sampler_hash (shader);
1907 key->entry.hash = key->kernel ^ ((key->sampler) << 16 | (key->sampler >> 16));
1911 i965_wm_state_equal (const void *A, const void *B)
1913 const struct i965_wm_state *a = A, *b = B;
1915 if (a->entry.hash != b->entry.hash)
1918 return a->kernel == b->kernel && a->sampler == b->sampler;
1922 i965_shader_binding_table_count (i965_shader_t *shader)
1927 if (shader->source.type.fragment != FS_CONSTANT)
1929 switch (shader->mask.type.fragment) {
1940 if (shader->clip.type.fragment == FS_SURFACE)
1942 if (shader->dst.type.fragment == FS_SURFACE)
1949 gen4_create_wm_state (i965_device_t *device,
1950 i965_shader_t *shader)
1952 struct brw_wm_unit_state *state;
1956 struct i965_wm_state key, *cache;
1957 cairo_status_t status;
1960 i965_wm_state_init (&key, shader);
1961 if (i965_wm_state_equal (&key, &device->wm_state))
1962 return device->wm_state.offset;
1964 cache = _cairo_hash_table_lookup (device->wm_states, &key.entry);
1965 if (cache != NULL) {
1966 device->wm_state = *cache;
1967 return cache->offset;
1970 kernel = create_wm_kernel (device, shader, &num_reg);
1971 sampler = emit_sampler_state_table (device, shader);
1973 state = i965_stream_alloc (&device->general, 32, sizeof (*state));
1974 memset (state, 0, sizeof (*state));
1975 state->thread0.grf_reg_count = BRW_GRF_BLOCKS (num_reg);
1976 assert ((kernel & 63) == 0);
1977 state->thread0.kernel_start_pointer = kernel >> 6;
1979 state->thread3.dispatch_grf_start_reg = 2;
1981 state->wm4.sampler_count = 1; /* 1-4 samplers used */
1982 assert ((sampler & 31) == 0);
1983 state->wm4.sampler_state_pointer = sampler >> 5;
1985 state->wm5.max_threads = PS_MAX_THREADS_CTG - 1;
1987 state->wm5.max_threads = PS_MAX_THREADS_BRW - 1;
1988 state->wm5.thread_dispatch_enable = 1;
1990 if (device->is_g4x) {
1991 /* XXX contiguous 32 pixel dispatch */
1993 state->wm5.enable_16_pix = 1;
1994 /* 8 pixel dispatch and friends */
1995 //state->wm5.early_depth_test = 1;
1997 state->thread1.binding_table_entry_count = i965_shader_binding_table_count(shader);
1998 state->thread3.urb_entry_read_length = i965_shader_pue_length (shader);
1999 state->thread3.const_urb_entry_read_length = i965_shader_const_urb_length (shader);
2001 key.offset = i965_stream_offsetof (&device->general, state);
2003 cache = _cairo_freelist_alloc (&device->wm_state_freelist);
2004 if (likely (cache != NULL)) {
2006 status = _cairo_hash_table_insert (device->wm_states, &cache->entry);
2007 if (unlikely (status))
2008 _cairo_freelist_free (&device->wm_state_freelist, cache);
2011 device->wm_state = key;
2016 vs_unit_state_emit (i965_device_t *device)
2018 if (device->vs_offset == (uint32_t) -1) {
2019 struct brw_vs_unit_state *state;
2021 /* Set up the vertex shader to be disabled (passthrough) */
2022 state = i965_stream_alloc (&device->general, 32, sizeof (*state));
2023 memset (state, 0, sizeof (*state));
2025 state->thread4.nr_urb_entries = URB_VS_ENTRIES;
2026 state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
2027 state->vs6.vert_cache_disable = 1;
2029 device->vs_offset = i965_stream_offsetof (&device->general, state);
2032 return device->vs_offset;
2036 i965_get_card_format (cairo_format_t format)
2039 case CAIRO_FORMAT_ARGB32:
2040 return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
2041 case CAIRO_FORMAT_RGB24:
2042 return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
2043 case CAIRO_FORMAT_RGB16_565:
2044 return BRW_SURFACEFORMAT_B5G6R5_UNORM;
2045 case CAIRO_FORMAT_A8:
2046 return BRW_SURFACEFORMAT_A8_UNORM;
2047 case CAIRO_FORMAT_A1:
2048 case CAIRO_FORMAT_INVALID:
2056 i965_get_dest_format (cairo_format_t format)
2059 case CAIRO_FORMAT_ARGB32:
2060 case CAIRO_FORMAT_RGB24:
2061 return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
2062 case CAIRO_FORMAT_RGB16_565:
2063 return BRW_SURFACEFORMAT_B5G6R5_UNORM;
2064 case CAIRO_FORMAT_A8:
2065 return BRW_SURFACEFORMAT_A8_UNORM;
2066 case CAIRO_FORMAT_A1:
2067 case CAIRO_FORMAT_INVALID:
2074 /* XXX silly inline due to compiler bug... */
2076 i965_stream_add_pending_relocation (i965_stream_t *stream,
2077 uint32_t target_offset,
2078 uint32_t read_domains,
2079 uint32_t write_domain,
2084 n = stream->num_pending_relocations++;
2085 assert (n < stream->max_pending_relocations);
2087 stream->pending_relocations[n].offset = target_offset;
2088 stream->pending_relocations[n].read_domains = read_domains;
2089 stream->pending_relocations[n].write_domain = write_domain;
2090 stream->pending_relocations[n].delta = delta;
2094 emit_surface_state (i965_device_t *device,
2095 cairo_bool_t is_target,
2097 cairo_format_t format,
2098 int width, int height, int stride,
2101 struct brw_surface_state *state;
2102 uint32_t write_domain, read_domains;
2105 state = i965_stream_alloc (&device->surface, 32, sizeof (*state));
2106 memset (state, 0, sizeof (*state));
2108 state->ss0.surface_type = type;
2110 state->ss0.surface_format = i965_get_dest_format (format);
2112 state->ss0.surface_format = i965_get_card_format (format);
2114 state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
2115 state->ss0.color_blend = 1;
2116 if (is_target && device->is_g4x)
2117 state->ss0.render_cache_read_mode = 1;
2119 state->ss1.base_addr = bo->offset;
2121 state->ss2.height = height - 1;
2122 state->ss2.width = width - 1;
2123 state->ss3.pitch = stride - 1;
2124 state->ss3.tile_walk = bo->tiling == I915_TILING_Y;
2125 state->ss3.tiled_surface = bo->tiling != I915_TILING_NONE;
2128 read_domains = I915_GEM_DOMAIN_RENDER;
2129 write_domain = I915_GEM_DOMAIN_RENDER;
2131 read_domains = I915_GEM_DOMAIN_SAMPLER;
2135 offset = i965_stream_offsetof (&device->surface, state);
2136 i965_emit_relocation (device, &device->surface,
2138 read_domains, write_domain,
2139 offset + offsetof (struct brw_surface_state, ss1.base_addr));
2144 emit_surface_state_for_shader (i965_device_t *device,
2145 const union i965_shader_channel *channel)
2147 int type = BRW_SURFACE_2D;
2149 assert (channel->type.fragment != FS_NONE);
2150 assert (channel->type.fragment != FS_CONSTANT);
2152 if (channel->type.fragment != FS_SURFACE)
2153 type = BRW_SURFACE_1D;
2155 return emit_surface_state (device, FALSE,
2157 channel->base.format,
2158 channel->base.width,
2159 channel->base.height,
2160 channel->base.stride,
2165 i965_wm_binding_equal (const void *A,
2168 const struct i965_wm_binding *a = A, *b = B;
2170 if (a->entry.hash != b->entry.hash)
2173 if (a->size != b->size)
2176 return memcmp (a->table, b->table, sizeof (uint32_t) * a->size) == 0;
2180 i965_wm_binding_init (struct i965_wm_binding *state,
2181 const uint32_t *table,
2186 state->entry.hash = size;
2189 for (n = 0; n < size; n++) {
2190 state->table[n] = table[n];
2191 state->entry.hash ^= (table[n] << (8 * n)) |
2192 (table[n] >> (32 - (8*n)));
2197 emit_binding_table (i965_device_t *device,
2198 i965_shader_t *shader)
2201 struct i965_wm_binding key, *cache;
2205 table = i965_stream_alloc (&device->surface, 32, 5 * sizeof (uint32_t));
2206 if (shader->target->stream != device->surface.serial) {
2207 shader->target->stream = device->surface.serial;
2208 shader->target->offset = emit_surface_state (device,
2210 to_intel_bo (shader->target->intel.drm.bo),
2211 shader->target->intel.drm.format,
2212 shader->target->intel.drm.width,
2213 shader->target->intel.drm.height,
2214 shader->target->intel.drm.stride,
2217 table[n++] = shader->target->offset;
2219 bo = shader->source.base.bo;
2221 if (bo->opaque0 != device->surface.serial) {
2222 bo->opaque0 = device->surface.serial;
2223 bo->opaque1 = emit_surface_state_for_shader (device, &shader->source);
2225 table[n++] = bo->opaque1;
2228 bo = shader->mask.base.bo;
2230 if (bo->opaque0 != device->surface.serial) {
2231 bo->opaque0 = device->surface.serial;
2232 bo->opaque1 = emit_surface_state_for_shader (device, &shader->mask);
2234 table[n++] = bo->opaque1;
2237 bo = shader->clip.base.bo;
2239 if (bo->opaque0 != device->surface.serial) {
2240 bo->opaque0 = device->surface.serial;
2241 bo->opaque1 = emit_surface_state_for_shader (device, &shader->clip);
2243 table[n++] = bo->opaque1;
2246 bo = shader->dst.base.bo;
2248 if (bo->opaque0 != device->surface.serial) {
2249 bo->opaque0 = device->surface.serial;
2250 bo->opaque1 = emit_surface_state_for_shader (device, &shader->dst);
2252 table[n++] = bo->opaque1;
2255 i965_wm_binding_init (&key, table, n);
2256 key.offset = i965_stream_offsetof (&device->surface, table);
2258 if (i965_wm_binding_equal (&key, &device->wm_binding)) {
2259 device->surface.used = key.offset;
2260 return device->wm_binding.offset;
2263 cache = _cairo_hash_table_lookup (device->wm_bindings, &key.entry);
2264 if (cache != NULL) {
2265 device->surface.used = key.offset;
2266 key.offset = cache->offset;
2269 device->wm_binding = key;
2274 i965_emit_invariants (i965_device_t *device)
2276 OUT_BATCH (BRW_CS_URB_STATE | 0);
2277 OUT_BATCH (((URB_CS_ENTRY_SIZE-1) << 4) | (URB_CS_ENTRIES << 0));
2281 i965_emit_urb_fences (i965_device_t *device)
2283 int urb_vs_start, urb_vs_size;
2284 int urb_gs_start, urb_gs_size;
2285 int urb_clip_start, urb_clip_size;
2286 int urb_sf_start, urb_sf_size;
2287 int urb_cs_start, urb_cs_size;
2289 if (device->have_urb_fences)
2294 urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
2295 urb_gs_start = urb_vs_start + urb_vs_size;
2296 urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
2297 urb_clip_start = urb_gs_start + urb_gs_size;
2298 urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
2299 urb_sf_start = urb_clip_start + urb_clip_size;
2300 urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
2301 urb_cs_start = urb_sf_start + urb_sf_size;
2302 urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
2304 /* erratum: URB_FENCE must not cross a 64-byte cache-line */
2305 while ((device->batch.used & 63) > 64-12)
2306 OUT_BATCH (MI_NOOP);
2307 OUT_BATCH (BRW_URB_FENCE |
2314 OUT_BATCH (((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
2315 ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
2316 ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
2317 OUT_BATCH (((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
2318 ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
2320 device->have_urb_fences = TRUE;
2321 device->constants_size = 0;
2325 i965_emit_base (i965_device_t *device)
2327 OUT_BATCH (BRW_STATE_BASE_ADDRESS | 4);
2328 if (likely (device->general.num_pending_relocations == 0)) {
2329 i965_stream_add_pending_relocation (&device->general,
2331 I915_GEM_DOMAIN_INSTRUCTION, 0,
2332 BASE_ADDRESS_MODIFY);
2334 OUT_BATCH (0); /* pending relocation */
2336 if (likely (device->surface.num_pending_relocations == 0)) {
2337 i965_stream_add_pending_relocation (&device->surface,
2339 I915_GEM_DOMAIN_INSTRUCTION, 0,
2340 BASE_ADDRESS_MODIFY);
2342 OUT_BATCH (0); /* pending relocation */
2344 OUT_BATCH (0 | BASE_ADDRESS_MODIFY);
2345 /* general state max addr, disabled */
2346 OUT_BATCH (0x10000000 | BASE_ADDRESS_MODIFY);
2347 /* media object state max addr, disabled */
2348 OUT_BATCH (0x10000000 | BASE_ADDRESS_MODIFY);
2352 i965_emit_vertex_element (i965_device_t *device,
2353 i965_shader_t *shader)
2361 if (shader->mask.type.vertex == VS_SPANS ||
2362 shader->mask.type.vertex == VS_GLYPHS)
2364 type = shader->mask.type.vertex;
2368 if (type == device->vertex_type)
2370 device->vertex_type = type;
2374 OUT_BATCH (BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1));
2375 OUT_BATCH ((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2377 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2378 (offset << VE0_OFFSET_SHIFT));
2379 OUT_BATCH ((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2380 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2381 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
2382 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
2383 (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
2386 assert (shader->source.type.vertex == VS_NONE);
2387 switch (shader->mask.type.vertex) {
2393 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2395 (BRW_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT) |
2396 (offset << VE0_OFFSET_SHIFT));
2397 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2398 (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_1_SHIFT) |
2399 (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) |
2400 (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) |
2401 (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
2407 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2409 (BRW_SURFACEFORMAT_R16G16_FLOAT << VE0_FORMAT_SHIFT) |
2410 (offset << VE0_OFFSET_SHIFT));
2411 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2412 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2413 (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) |
2414 (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) |
2415 (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
2420 assert (shader->clip.type.vertex == VS_NONE);
2421 assert (shader->dst.type.vertex == VS_NONE);
2423 device->vertex_size = offset;
2424 i965_stream_align (&device->vertex, device->vertex_size);
2425 device->vertex.committed = device->vertex.used;
2427 device->rectangle_size = 3 * offset;
2431 i965_shader_needs_surface_update (const i965_shader_t *shader,
2432 const i965_device_t *device)
2434 return device->target != shader->target || shader->target->stream == 0 ||
2435 (shader->source.base.bo != NULL && device->source != shader->source.base.bo) ||
2436 (shader->mask.base.bo != NULL && device->mask != shader->mask.base.bo) ||
2437 (shader->clip.base.bo != NULL && device->clip != shader->clip.base.bo);
2441 i965_shader_needs_constants_update (const i965_shader_t *shader,
2442 const i965_device_t *device)
2444 if (shader->constants_size == 0)
2447 if (device->constants_size != shader->constants_size)
2450 return memcmp (device->constants,
2452 sizeof (float) * shader->constants_size);
2456 i965_shader_needs_state_update (const i965_shader_t *shader,
2457 const i965_device_t *device)
2460 struct i965_sf_state sf;
2461 struct i965_wm_state wm;
2462 struct i965_cc_state cc;
2465 i965_sf_state_init (&state.sf, shader);
2466 if (! i965_sf_state_equal (&state.sf, &device->sf_state))
2469 i965_wm_state_init (&state.wm, shader);
2470 if (! i965_wm_state_equal (&state.wm, &device->wm_state))
2473 i965_cc_state_init (&state.cc, shader);
2474 if (! i965_cc_state_equal (&state.cc, &device->cc_state))
2481 i965_emit_composite (i965_device_t *device,
2482 i965_shader_t *shader)
2484 uint32_t draw_rectangle;
2486 if (i965_shader_needs_surface_update (shader, device)) {
2489 offset = emit_binding_table (device, shader);
2491 /* Only the PS uses the binding table */
2492 OUT_BATCH (BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
2493 OUT_BATCH (0); /* vs */
2494 OUT_BATCH (0); /* gs */
2495 OUT_BATCH (0); /* clip */
2496 OUT_BATCH (0); /* sf */
2499 device->target = shader->target;
2500 device->source = shader->source.base.bo;
2501 device->mask = shader->mask.base.bo;
2502 device->clip = shader->clip.base.bo;
2505 /* The drawing rectangle clipping is always on. Set it to values that
2506 * shouldn't do any clipping.
2508 draw_rectangle = DRAW_YMAX (shader->target->intel.drm.height) |
2509 DRAW_XMAX (shader->target->intel.drm.width);
2510 if (draw_rectangle != device->draw_rectangle) {
2511 OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2);
2512 OUT_BATCH (0x00000000); /* ymin, xmin */
2513 OUT_BATCH (draw_rectangle);
2514 OUT_BATCH (0x00000000); /* yorigin, xorigin */
2515 device->draw_rectangle = draw_rectangle;
2518 /* skip the depth buffer */
2519 /* skip the polygon stipple */
2520 /* skip the polygon stipple offset */
2521 /* skip the line stipple */
2523 /* Set the pointers to the 3d pipeline state */
2524 if (i965_shader_needs_state_update (shader, device)) {
2525 OUT_BATCH (BRW_3DSTATE_PIPELINED_POINTERS | 5);
2526 OUT_BATCH (vs_unit_state_emit (device));
2527 OUT_BATCH (BRW_GS_DISABLE);
2528 OUT_BATCH (BRW_CLIP_DISABLE);
2529 OUT_BATCH (gen4_create_sf_state (device, shader));
2530 OUT_BATCH (gen4_create_wm_state (device, shader));
2531 OUT_BATCH (cc_state_emit (device, shader));
2533 /* Once the units are initialized, we need to setup the fences */
2534 i965_emit_urb_fences (device);
2537 if (i965_shader_needs_constants_update (shader, device)) {
2538 uint32_t size = (sizeof (float) * shader->constants_size + 63) & -64;
2540 /* XXX reuse clear/black/white
2544 /* XXX CONSTANT_BUFFER Address Offset Disable? INSTPM? */
2546 assert (size <= 64 * URB_CS_ENTRY_SIZE);
2547 assert (((sizeof (float) * shader->constants_size + 31) & -32) == 32 * i965_shader_const_urb_length (shader));
2549 device->constants = i965_stream_alloc (&device->surface, 64, size);
2550 memcpy (device->constants, shader->constants, size);
2551 device->constants_size = shader->constants_size;
2553 OUT_BATCH (BRW_CONSTANT_BUFFER | (1 << 8));
2554 OUT_BATCH (i965_stream_offsetof (&device->surface, device->constants) + size / 64 - 1);
2557 i965_emit_vertex_element (device, shader);
2561 i965_flush_vertices (i965_device_t *device)
2563 int vertex_count, vertex_start;
2565 if (device->vertex.used == device->vertex.committed)
2568 assert (device->vertex.used > device->vertex.committed);
2570 vertex_start = device->vertex.committed / device->vertex_size;
2572 (device->vertex.used - device->vertex.committed) / device->vertex_size;
2574 assert (vertex_count);
2576 if (device->vertex_size != device->last_vertex_size) {
2577 i965_stream_add_pending_relocation (&device->vertex,
2578 device->batch.used + 8,
2579 I915_GEM_DOMAIN_VERTEX, 0,
2582 OUT_BATCH (BRW_3DSTATE_VERTEX_BUFFERS | 3);
2583 OUT_BATCH ((0 << VB0_BUFFER_INDEX_SHIFT) |
2585 (device->vertex_size << VB0_BUFFER_PITCH_SHIFT));
2586 OUT_BATCH (0); /* pending relocation */
2589 device->last_vertex_size = device->vertex_size;
2592 OUT_BATCH (BRW_3DPRIMITIVE |
2593 BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
2594 (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
2597 OUT_BATCH (vertex_count); /* vertex count per instance */
2598 OUT_BATCH (vertex_start); /* start vertex offset */
2599 OUT_BATCH (1); /* single instance */
2603 device->vertex.committed = device->vertex.used;
2607 i965_finish_vertices (i965_device_t *device)
2609 cairo_status_t status;
2611 i965_flush_vertices (device);
2613 i965_stream_commit (device, &device->vertex);
2615 if (! i965_shader_check_aperture (device->shader, device)) {
2616 status = i965_device_flush (device);
2617 if (unlikely (status))
2618 longjmp (device->shader->unwind, status);
2620 status = i965_shader_commit (device->shader, device);
2621 assert (status == CAIRO_STATUS_SUCCESS);
2624 device->last_vertex_size = 0;
2628 i965_shader_needs_update (const i965_shader_t *shader,
2629 const i965_device_t *device)
2631 if (i965_shader_needs_surface_update (shader, device))
2634 if (i965_shader_needs_constants_update (shader, device))
2637 return i965_shader_needs_state_update (shader, device);
2641 i965_shader_reduce (i965_shader_t *shader,
2642 const i965_device_t *device)
2644 if (shader->op == CAIRO_OPERATOR_OVER &&
2645 (i965_wm_kernel_hash (shader) & ~0xff) == 0 &&
2646 (shader->source.base.content & CAIRO_CONTENT_ALPHA) == 0)
2648 shader->op = CAIRO_OPERATOR_SOURCE;
2653 i965_shader_commit (i965_shader_t *shader,
2654 i965_device_t *device)
2656 cairo_status_t status;
2658 if (! shader->committed) {
2659 device->shader = shader;
2661 status = i965_shader_setup_dst (shader);
2662 if (unlikely (status))
2665 i965_shader_setup_constants (shader);
2666 i965_shader_reduce (shader, device);
2668 if ((status = setjmp (shader->unwind)))
2671 shader->committed = TRUE;
2674 if (! i965_shader_needs_update (shader, device))
2675 return CAIRO_STATUS_SUCCESS;
2677 /* XXX too many guestimates about likely maximum sizes */
2679 if (device->batch.used + 128 > device->batch.size ||
2680 ! i965_shader_check_aperture (shader, device))
2682 status = i965_device_flush (device);
2683 if (unlikely (status))
2684 longjmp (shader->unwind, status);
2687 i965_flush_vertices (device);
2689 if (unlikely (device->surface.used + 128 > device->surface.size ||
2690 device->surface.num_relocations + 4 > device->surface.max_relocations))
2692 i965_stream_commit (device, &device->surface);
2696 if (unlikely (device->general.used + 512 > device->general.size)) {
2697 i965_stream_commit (device, &device->general);
2698 i965_general_state_reset (device);
2702 if (unlikely (device->batch.used == 0))
2703 i965_emit_invariants (device);
2705 if (unlikely (device->surface.num_pending_relocations == 0 ||
2706 device->general.num_pending_relocations == 0))
2708 i965_emit_base (device);
2711 i965_emit_composite (device, shader);
2713 return CAIRO_STATUS_SUCCESS;
2717 i965_clipped_vertices (i965_device_t *device,
2718 struct i965_vbo *vbo,
2719 cairo_region_t *clip_region)
2721 int i, num_rectangles, size;
2722 cairo_status_t status;
2724 if (vbo->count == 0)
2727 num_rectangles = cairo_region_num_rectangles (clip_region);
2728 assert (num_rectangles);
2731 vbo->count * device->vertex_size + device->vertex.used > device->vertex.size)
2733 i965_finish_vertices (device);
2735 size = device->rectangle_size;
2737 for (i = 0; i < num_rectangles; i++) {
2738 cairo_rectangle_int_t rect;
2740 cairo_region_get_rectangle (clip_region, i, &rect);
2742 if (unlikely (device->vertex.used + size > device->vertex.size ||
2743 device->batch.used + 64 > device->batch.size ||
2744 ! i965_shader_check_aperture (device->shader, device)))
2746 status = i965_device_flush (device);
2747 if (unlikely (status))
2748 longjmp (device->shader->unwind, status);
2750 status = i965_shader_commit (device->shader, device);
2751 assert (status == CAIRO_STATUS_SUCCESS);
2754 i965_emit_relocation (device, &device->batch,
2756 I915_GEM_DOMAIN_VERTEX, 0,
2757 device->batch.used + 8);
2759 OUT_BATCH (BRW_3DSTATE_VERTEX_BUFFERS | 3);
2760 OUT_BATCH ((0 << VB0_BUFFER_INDEX_SHIFT) |
2762 (device->vertex_size << VB0_BUFFER_PITCH_SHIFT));
2763 OUT_BATCH (vbo->bo->offset);
2768 OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2);
2769 OUT_BATCH (DRAW_YMIN (rect.y) | DRAW_XMIN (rect.x));
2770 OUT_BATCH (DRAW_YMAX (rect.y + rect.height) |
2771 DRAW_XMAX (rect.x + rect.width));
2772 OUT_BATCH (0x00000000); /* yorigin, xorigin */
2774 OUT_BATCH (BRW_3DPRIMITIVE |
2775 BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
2776 (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
2779 OUT_BATCH (vbo->count); /* vertex count per instance */
2780 OUT_BATCH (0); /* start vertex offset */
2781 OUT_BATCH (1); /* single instance */
2785 } while ((vbo = vbo->next) != NULL);
2786 assert (device->last_vertex_size == 0);
2788 int vertex_start, vertex_count;
2791 vertex_start = device->vertex.committed / device->vertex_size;
2792 vertex_count = vbo->count;
2794 size = vertex_count * device->vertex_size;
2795 ptr = intel_bo_map (&device->intel, vbo->bo);
2796 memcpy (device->vertex.data + device->vertex.used, ptr, size);
2797 device->vertex.committed = device->vertex.used += size;
2799 for (i = 0; i < num_rectangles; i++) {
2800 cairo_rectangle_int_t rect;
2802 cairo_region_get_rectangle (clip_region, i, &rect);
2805 OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2);
2806 OUT_BATCH (DRAW_YMIN (rect.y) | DRAW_XMIN (rect.x));
2807 OUT_BATCH (DRAW_YMAX (rect.y + rect.height) |
2808 DRAW_XMAX (rect.x + rect.width));
2809 OUT_BATCH (0x00000000); /* yorigin, xorigin */
2811 OUT_BATCH (BRW_3DPRIMITIVE |
2812 BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
2813 (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
2816 OUT_BATCH (vertex_count); /* vertex count per instance */
2817 OUT_BATCH (vertex_start); /* start vertex offset */
2818 OUT_BATCH (1); /* single instance */
2824 device->draw_rectangle = 0;