2 * Copyright © 2010-2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Chris Wilson <chris@chris-wilson.co.uk>
33 #include "sna_render.h"
34 #include "sna_render_inline.h"
36 #include "sna_video.h"
38 #include "gen3_render.h"
40 #define NO_COMPOSITE 0
41 #define NO_COMPOSITE_SPANS 0
43 #define NO_COPY_BOXES 0
46 #define NO_FILL_BOXES 0
48 #define PREFER_BLT_FILL 1
62 #define MAX_3D_SIZE 2048
63 #define MAX_3D_PITCH 8192
65 #define OUT_BATCH(v) batch_emit(sna, v)
66 #define OUT_BATCH_F(v) batch_emit_float(sna, v)
67 #define OUT_VERTEX(v) vertex_emit(sna, v)
69 enum gen3_radial_mode {
74 static const struct blendinfo {
80 /* Clear */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
81 /* Src */ {0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
82 /* Dst */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
83 /* Over */ {0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
84 /* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
85 /* In */ {1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
86 /* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
87 /* Out */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
88 /* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
89 /* Atop */ {1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
90 /* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
91 /* Xor */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
92 /* Add */ {0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
95 #define S6_COLOR_WRITE_ONLY \
96 (S6_COLOR_WRITE_ENABLE | \
97 BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | \
98 BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | \
99 BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT)
101 static const struct formatinfo {
102 unsigned int fmt, xfmt;
105 } gen3_tex_formats[] = {
106 {PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, false},
107 {PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
108 {PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
109 {PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
110 {PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false},
111 {PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false},
112 {PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false},
113 {PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false},
114 {PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true},
115 {PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false},
116 {PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true},
117 {PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false},
118 {PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true},
121 #define xFixedToDouble(f) pixman_fixed_to_double(f)
123 static inline bool too_large(int width, int height)
125 return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
128 static inline uint32_t gen3_buf_tiling(uint32_t tiling)
132 case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
133 case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
134 case I915_TILING_NONE: break;
140 gen3_check_pitch_3d(struct kgem_bo *bo)
142 return bo->pitch <= MAX_3D_PITCH;
145 static uint32_t gen3_get_blend_cntl(int op,
146 bool has_component_alpha,
149 uint32_t sblend = gen3_blend_op[op].src_blend;
150 uint32_t dblend = gen3_blend_op[op].dst_blend;
152 if (op <= PictOpSrc) /* for clear and src disable blending */
153 return S6_COLOR_WRITE_ONLY;
155 /* If there's no dst alpha channel, adjust the blend op so that we'll
156 * treat it as always 1.
158 if (gen3_blend_op[op].dst_alpha) {
159 if (PICT_FORMAT_A(dst_format) == 0) {
160 if (sblend == BLENDFACT_DST_ALPHA)
161 sblend = BLENDFACT_ONE;
162 else if (sblend == BLENDFACT_INV_DST_ALPHA)
163 sblend = BLENDFACT_ZERO;
166 /* gen3 engine reads 8bit color buffer into green channel
167 * in cases like color buffer blending etc., and also writes
168 * back green channel. So with dst_alpha blend we should use
169 * color factor. See spec on "8-bit rendering".
171 if (dst_format == PICT_a8) {
172 if (sblend == BLENDFACT_DST_ALPHA)
173 sblend = BLENDFACT_DST_COLR;
174 else if (sblend == BLENDFACT_INV_DST_ALPHA)
175 sblend = BLENDFACT_INV_DST_COLR;
179 /* If the source alpha is being used, then we should only be in a case
180 * where the source blend factor is 0, and the source blend value is the
181 * mask channels multiplied by the source picture's alpha.
183 if (has_component_alpha && gen3_blend_op[op].src_alpha) {
184 if (dblend == BLENDFACT_SRC_ALPHA)
185 dblend = BLENDFACT_SRC_COLR;
186 else if (dblend == BLENDFACT_INV_SRC_ALPHA)
187 dblend = BLENDFACT_INV_SRC_COLR;
190 return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
191 BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
192 sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT |
193 dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
196 static bool gen3_check_dst_format(uint32_t format)
209 case PICT_a2r10g10b10:
210 case PICT_x2r10g10b10:
211 case PICT_a2b10g10r10:
212 case PICT_x2b10g10r10:
224 static bool gen3_dst_rb_reversed(uint32_t format)
232 case PICT_a2r10g10b10:
233 case PICT_x2r10g10b10:
243 #define DSTORG_HORT_BIAS(x) ((x)<<20)
244 #define DSTORG_VERT_BIAS(x) ((x)<<16)
246 static uint32_t gen3_get_dst_format(uint32_t format)
248 #define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8))
255 return BIAS | COLR_BUF_ARGB8888;
258 return BIAS | COLR_BUF_RGB565;
263 return BIAS | COLR_BUF_ARGB1555;
264 case PICT_a2r10g10b10:
265 case PICT_x2r10g10b10:
266 case PICT_a2b10g10r10:
267 case PICT_x2b10g10r10:
268 return BIAS | COLR_BUF_ARGB2AAA;
270 return BIAS | COLR_BUF_8BIT;
275 return BIAS | COLR_BUF_ARGB4444;
280 static bool gen3_check_format(PicturePtr p)
288 case PICT_a2r10g10b10:
289 case PICT_a2b10g10r10:
302 static bool gen3_check_xformat(PicturePtr p)
315 case PICT_a2r10g10b10:
316 case PICT_x2r10g10b10:
317 case PICT_a2b10g10r10:
318 case PICT_x2b10g10r10:
330 static uint32_t gen3_texture_repeat(uint32_t repeat)
333 (SS3_NORMALIZED_COORDS | \
334 TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \
335 TEXCOORDMODE_##x << SS3_TCY_ADDR_MODE_SHIFT)
339 return REPEAT(CLAMP_BORDER);
343 return REPEAT(CLAMP_EDGE);
345 return REPEAT(MIRROR);
350 static uint32_t gen3_gradient_repeat(uint32_t repeat)
353 (SS3_NORMALIZED_COORDS | \
354 TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \
355 TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT)
359 return REPEAT(CLAMP_BORDER);
363 return REPEAT(CLAMP_EDGE);
365 return REPEAT(MIRROR);
370 static bool gen3_check_repeat(PicturePtr p)
375 switch (p->repeatType) {
386 static uint32_t gen3_filter(uint32_t filter)
391 case PictFilterNearest:
392 return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
393 FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
394 MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
395 case PictFilterBilinear:
396 return (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT |
397 FILTER_LINEAR << SS2_MIN_FILTER_SHIFT |
398 MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
402 static bool gen3_check_filter(PicturePtr p)
405 case PictFilterNearest:
406 case PictFilterBilinear:
414 gen3_emit_composite_dstcoord(struct sna *sna, int16_t dstX, int16_t dstY)
421 gen3_emit_composite_primitive_constant(struct sna *sna,
422 const struct sna_composite_op *op,
423 const struct sna_composite_rectangles *r)
425 int16_t dst_x = r->dst.x + op->dst.x;
426 int16_t dst_y = r->dst.y + op->dst.y;
428 gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
429 gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
430 gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
434 gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
435 const struct sna_composite_op *op,
436 const struct sna_composite_rectangles *r)
438 int16_t dst_x, dst_y;
439 int16_t src_x, src_y;
441 dst_x = r->dst.x + op->dst.x;
442 dst_y = r->dst.y + op->dst.y;
443 src_x = r->src.x + op->src.offset[0];
444 src_y = r->src.y + op->src.offset[1];
446 gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
447 OUT_VERTEX(src_x + r->width);
448 OUT_VERTEX(src_y + r->height);
450 gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
452 OUT_VERTEX(src_y + r->height);
454 gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
460 gen3_emit_composite_primitive_affine_gradient(struct sna *sna,
461 const struct sna_composite_op *op,
462 const struct sna_composite_rectangles *r)
464 PictTransform *transform = op->src.transform;
465 int16_t dst_x, dst_y;
466 int16_t src_x, src_y;
469 dst_x = r->dst.x + op->dst.x;
470 dst_y = r->dst.y + op->dst.y;
471 src_x = r->src.x + op->src.offset[0];
472 src_y = r->src.y + op->src.offset[1];
474 sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
477 gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
481 sna_get_transformed_coordinates(src_x, src_y + r->height,
484 gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
488 sna_get_transformed_coordinates(src_x, src_y,
491 gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
497 gen3_emit_composite_primitive_identity_source(struct sna *sna,
498 const struct sna_composite_op *op,
499 const struct sna_composite_rectangles *r)
505 v = sna->render.vertices + sna->render.vertex_used;
506 sna->render.vertex_used += 12;
508 v[8] = v[4] = r->dst.x + op->dst.x;
511 v[9] = r->dst.y + op->dst.y;
512 v[5] = v[1] = v[9] + h;
514 v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
515 v[2] = v[6] + w * op->src.scale[0];
517 v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
518 v[7] = v[3] = v[11] + h * op->src.scale[1];
522 gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
523 const struct sna_composite_op *op,
524 const struct sna_composite_rectangles *r)
530 v = sna->render.vertices + sna->render.vertex_used;
531 sna->render.vertex_used += 12;
533 v[8] = v[4] = r->dst.x;
537 v[5] = v[1] = v[9] + h;
539 v[10] = v[6] = r->src.x * op->src.scale[0];
540 v[11] = r->src.y * op->src.scale[1];
542 v[2] = v[6] + w * op->src.scale[0];
543 v[7] = v[3] = v[11] + h * op->src.scale[1];
547 gen3_emit_composite_primitive_affine_source(struct sna *sna,
548 const struct sna_composite_op *op,
549 const struct sna_composite_rectangles *r)
551 PictTransform *transform = op->src.transform;
552 int16_t dst_x = r->dst.x + op->dst.x;
553 int16_t dst_y = r->dst.y + op->dst.y;
554 int src_x = r->src.x + (int)op->src.offset[0];
555 int src_y = r->src.y + (int)op->src.offset[1];
558 _sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
562 gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
563 OUT_VERTEX(sx * op->src.scale[0]);
564 OUT_VERTEX(sy * op->src.scale[1]);
566 _sna_get_transformed_coordinates(src_x, src_y + r->height,
569 gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
570 OUT_VERTEX(sx * op->src.scale[0]);
571 OUT_VERTEX(sy * op->src.scale[1]);
573 _sna_get_transformed_coordinates(src_x, src_y,
576 gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
577 OUT_VERTEX(sx * op->src.scale[0]);
578 OUT_VERTEX(sy * op->src.scale[1]);
582 gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
583 const struct sna_composite_op *op,
584 const struct sna_composite_rectangles *r)
590 v = sna->render.vertices + sna->render.vertex_used;
591 sna->render.vertex_used += 12;
593 v[8] = v[4] = r->dst.x + op->dst.x;
596 v[9] = r->dst.y + op->dst.y;
597 v[5] = v[1] = v[9] + h;
599 v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
600 v[2] = v[6] + w * op->mask.scale[0];
602 v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
603 v[7] = v[3] = v[11] + h * op->mask.scale[1];
607 gen3_emit_composite_primitive_constant_identity_mask_no_offset(struct sna *sna,
608 const struct sna_composite_op *op,
609 const struct sna_composite_rectangles *r)
615 v = sna->render.vertices + sna->render.vertex_used;
616 sna->render.vertex_used += 12;
618 v[8] = v[4] = r->dst.x;
622 v[5] = v[1] = v[9] + h;
624 v[10] = v[6] = r->mask.x * op->mask.scale[0];
625 v[11] = r->mask.y * op->mask.scale[1];
627 v[2] = v[6] + w * op->mask.scale[0];
628 v[7] = v[3] = v[11] + h * op->mask.scale[1];
632 gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
633 const struct sna_composite_op *op,
634 const struct sna_composite_rectangles *r)
642 dst_x = r->dst.x + op->dst.x;
643 dst_y = r->dst.y + op->dst.y;
644 src_x = r->src.x + op->src.offset[0];
645 src_y = r->src.y + op->src.offset[1];
646 msk_x = r->mask.x + op->mask.offset[0];
647 msk_y = r->mask.y + op->mask.offset[1];
651 v = sna->render.vertices + sna->render.vertex_used;
652 sna->render.vertex_used += 18;
656 v[2] = (src_x + w) * op->src.scale[0];
657 v[3] = (src_y + h) * op->src.scale[1];
658 v[4] = (msk_x + w) * op->mask.scale[0];
659 v[5] = (msk_y + h) * op->mask.scale[1];
663 v[8] = src_x * op->src.scale[0];
665 v[10] = msk_x * op->mask.scale[0];
671 v[15] = src_y * op->src.scale[1];
673 v[17] = msk_y * op->mask.scale[1];
677 gen3_emit_composite_primitive_affine_source_mask(struct sna *sna,
678 const struct sna_composite_op *op,
679 const struct sna_composite_rectangles *r)
681 int16_t src_x, src_y;
687 dst_x = r->dst.x + op->dst.x;
688 dst_y = r->dst.y + op->dst.y;
689 src_x = r->src.x + op->src.offset[0];
690 src_y = r->src.y + op->src.offset[1];
691 msk_x = r->mask.x + op->mask.offset[0];
692 msk_y = r->mask.y + op->mask.offset[1];
696 v = sna->render.vertices + sna->render.vertex_used;
697 sna->render.vertex_used += 18;
701 sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
704 v[2] *= op->src.scale[0];
705 v[3] *= op->src.scale[1];
706 v[4] = (msk_x + w) * op->mask.scale[0];
707 v[5] = (msk_y + h) * op->mask.scale[1];
711 sna_get_transformed_coordinates(src_x, src_y + r->height,
714 v[8] *= op->src.scale[0];
715 v[9] *= op->src.scale[1];
716 v[10] = msk_x * op->mask.scale[0];
721 sna_get_transformed_coordinates(src_x, src_y,
724 v[14] *= op->src.scale[0];
725 v[15] *= op->src.scale[1];
727 v[17] = msk_y * op->mask.scale[1];
731 gen3_emit_composite_texcoord(struct sna *sna,
732 const struct sna_composite_channel *channel,
733 int16_t x, int16_t y)
735 float s = 0, t = 0, w = 1;
737 switch (channel->u.gen3.type) {
743 case SHADER_CONSTANT:
749 x += channel->offset[0];
750 y += channel->offset[1];
751 if (channel->is_affine) {
752 sna_get_transformed_coordinates(x, y,
755 OUT_VERTEX(s * channel->scale[0]);
756 OUT_VERTEX(t * channel->scale[1]);
758 sna_get_transformed_coordinates_3d(x, y,
761 OUT_VERTEX(s * channel->scale[0]);
762 OUT_VERTEX(t * channel->scale[1]);
771 gen3_emit_composite_vertex(struct sna *sna,
772 const struct sna_composite_op *op,
773 int16_t srcX, int16_t srcY,
774 int16_t maskX, int16_t maskY,
775 int16_t dstX, int16_t dstY)
777 gen3_emit_composite_dstcoord(sna, dstX, dstY);
778 gen3_emit_composite_texcoord(sna, &op->src, srcX, srcY);
779 gen3_emit_composite_texcoord(sna, &op->mask, maskX, maskY);
783 gen3_emit_composite_primitive(struct sna *sna,
784 const struct sna_composite_op *op,
785 const struct sna_composite_rectangles *r)
787 gen3_emit_composite_vertex(sna, op,
789 r->src.y + r->height,
790 r->mask.x + r->width,
791 r->mask.y + r->height,
792 op->dst.x + r->dst.x + r->width,
793 op->dst.y + r->dst.y + r->height);
794 gen3_emit_composite_vertex(sna, op,
796 r->src.y + r->height,
798 r->mask.y + r->height,
799 op->dst.x + r->dst.x,
800 op->dst.y + r->dst.y + r->height);
801 gen3_emit_composite_vertex(sna, op,
806 op->dst.x + r->dst.x,
807 op->dst.y + r->dst.y);
811 gen3_2d_perspective(struct sna *sna, int in, int out)
813 gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W));
815 gen3_fs_operand(in, X, Y, ZERO, ONE),
816 gen3_fs_operand_reg(out));
820 gen3_linear_coord(struct sna *sna,
821 const struct sna_composite_channel *channel,
824 int c = channel->u.gen3.constants;
826 if (!channel->is_affine) {
827 gen3_2d_perspective(sna, in, FS_U0);
831 gen3_fs_mov(out, gen3_fs_operand_zero());
832 gen3_fs_dp3(out, MASK_X,
833 gen3_fs_operand(in, X, Y, ONE, ZERO),
834 gen3_fs_operand_reg(c));
838 gen3_radial_coord(struct sna *sna,
839 const struct sna_composite_channel *channel,
842 int c = channel->u.gen3.constants;
844 if (!channel->is_affine) {
845 gen3_2d_perspective(sna, in, FS_U0);
849 switch (channel->u.gen3.mode) {
852 pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
853 r² = pdx*pdx + pdy*pdy
854 t = r²/sqrt(r²) - r1/dr;
856 gen3_fs_mad(FS_U0, MASK_X | MASK_Y,
857 gen3_fs_operand(in, X, Y, ZERO, ZERO),
858 gen3_fs_operand(c, Z, Z, ZERO, ZERO),
859 gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO));
860 gen3_fs_dp2add(FS_U0, MASK_X,
861 gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
862 gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
863 gen3_fs_operand_zero());
864 gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X));
866 gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO),
867 gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
868 gen3_fs_operand(c, W, ZERO, ZERO, ZERO));
873 pdx = x - c1x, pdy = y - c1y;
875 B = -2*(pdx*dx + pdy*dy + r1*dr);
876 C = pdx² + pdy² - r1²;
878 t = (-B + sqrt (det)) / (2 * A)
881 /* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
883 gen3_fs_operand(in, X, Y, ZERO, ZERO),
884 gen3_fs_operand(c, X, Y, Z, ZERO));
885 /* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
886 gen3_fs_dp3(FS_U0, MASK_W,
887 gen3_fs_operand(FS_U0, X, Y, ONE, ZERO),
888 gen3_fs_operand(c+1, X, Y, Z, ZERO));
889 /* u1.x = pdx² + pdy² - r1²; [C] */
890 gen3_fs_dp3(FS_U1, MASK_X,
891 gen3_fs_operand(FS_U0, X, Y, Z, ZERO),
892 gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO));
893 /* u1.x = C, u1.y = B, u1.z=-4*A; */
894 gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W));
895 gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W));
896 /* u1.x = B² - 4*A*C */
897 gen3_fs_dp2add(FS_U1, MASK_X,
898 gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO),
899 gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO),
900 gen3_fs_operand_zero());
901 /* out.x = -B + sqrt (B² - 4*A*C), */
902 gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X));
903 gen3_fs_mad(out, MASK_X,
904 gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
905 gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO),
906 gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO));
907 /* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), */
909 gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
910 gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO));
916 gen3_composite_emit_shader(struct sna *sna,
917 const struct sna_composite_op *op,
920 bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0;
921 const struct sna_composite_channel *src, *mask;
922 struct gen3_render_state *state = &sna->render_state.gen3;
923 uint32_t shader_offset, id;
924 int src_reg, mask_reg;
929 if (mask->u.gen3.type == SHADER_NONE)
932 if (mask && src->is_opaque &&
933 gen3_blend_op[blend].src_alpha &&
934 op->has_component_alpha) {
939 id = (src->u.gen3.type |
940 src->is_affine << 4 |
941 src->alpha_fixup << 5 |
942 src->rb_reversed << 6);
944 id |= (mask->u.gen3.type << 8 |
945 mask->is_affine << 12 |
946 gen3_blend_op[blend].src_alpha << 13 |
947 op->has_component_alpha << 14 |
948 mask->alpha_fixup << 15 |
949 mask->rb_reversed << 16);
951 id |= dst_is_alpha << 24;
952 id |= op->rb_reversed << 25;
954 if (id == state->last_shader)
957 state->last_shader = id;
959 shader_offset = sna->kgem.nbatch++;
961 switch (src->u.gen3.type) {
969 case SHADER_CONSTANT:
983 switch (src->u.gen3.type) {
985 gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
989 gen3_fs_mov(FS_OC, gen3_fs_operand_one());
991 gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE));
994 gen3_fs_mov(FS_OC, gen3_fs_operand_one());
997 if (src->alpha_fixup && dst_is_alpha) {
998 gen3_fs_mov(FS_OC, gen3_fs_operand_one());
1001 /* No mask, so load directly to output color */
1002 if (src->u.gen3.type != SHADER_CONSTANT) {
1003 if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed)
1008 switch (src->u.gen3.type) {
1010 gen3_linear_coord(sna, src, FS_T0, FS_R0);
1011 gen3_fs_texld(src_reg, FS_S0, FS_R0);
1015 gen3_radial_coord(sna, src, FS_T0, FS_R0);
1016 gen3_fs_texld(src_reg, FS_S0, FS_R0);
1019 case SHADER_TEXTURE:
1021 gen3_fs_texld(src_reg, FS_S0, FS_T0);
1023 gen3_fs_texldp(src_reg, FS_S0, FS_T0);
1031 case SHADER_CONSTANT:
1035 if (src_reg != FS_OC) {
1036 if (src->alpha_fixup)
1038 src->rb_reversed ^ op->rb_reversed ?
1039 gen3_fs_operand(src_reg, Z, Y, X, ONE) :
1040 gen3_fs_operand(src_reg, X, Y, Z, ONE));
1041 else if (dst_is_alpha)
1042 gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W));
1043 else if (src->rb_reversed ^ op->rb_reversed)
1044 gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W));
1046 gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg));
1047 } else if (src->alpha_fixup)
1048 gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one());
1050 int out_reg = FS_OC;
1051 if (op->rb_reversed)
1054 switch (mask->u.gen3.type) {
1055 case SHADER_CONSTANT:
1059 case SHADER_TEXTURE:
1062 gen3_fs_dcl(FS_S0 + t);
1064 case SHADER_OPACITY:
1065 gen3_fs_dcl(FS_T0 + t);
1076 switch (src->u.gen3.type) {
1078 gen3_linear_coord(sna, src, FS_T0, FS_R0);
1079 gen3_fs_texld(FS_R0, FS_S0, FS_R0);
1085 gen3_radial_coord(sna, src, FS_T0, FS_R0);
1086 gen3_fs_texld(FS_R0, FS_S0, FS_R0);
1091 case SHADER_TEXTURE:
1093 gen3_fs_texld(FS_R0, FS_S0, FS_T0);
1095 gen3_fs_texldp(FS_R0, FS_S0, FS_T0);
1100 case SHADER_CONSTANT:
1107 if (src->alpha_fixup)
1108 gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one());
1109 if (src->rb_reversed)
1110 gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W));
1112 switch (mask->u.gen3.type) {
1114 gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1);
1115 gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
1120 gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1);
1121 gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
1125 case SHADER_TEXTURE:
1126 if (mask->is_affine)
1127 gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
1129 gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
1133 case SHADER_OPACITY:
1134 switch (src->u.gen3.type) {
1137 if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) {
1138 gen3_fs_mov(out_reg,
1139 gen3_fs_operand(FS_T0 + t, X, X, X, X));
1141 gen3_fs_mov(out_reg,
1142 gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X));
1147 gen3_fs_mul(out_reg,
1148 gen3_fs_operand(src_reg, W, W, W, W),
1149 gen3_fs_operand(FS_T0 + t, X, X, X, X));
1151 gen3_fs_mul(out_reg,
1152 gen3_fs_operand(src_reg, X, Y, Z, W),
1153 gen3_fs_operand(FS_T0 + t, X, X, X, X));
1158 case SHADER_CONSTANT:
1165 if (mask->alpha_fixup)
1166 gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one());
1167 if (mask->rb_reversed)
1168 gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
1171 switch (src->u.gen3.type) {
1174 gen3_fs_mov(out_reg,
1175 gen3_fs_operand(mask_reg, W, W, W, W));
1178 gen3_fs_mul(out_reg,
1179 gen3_fs_operand(src_reg, W, W, W, W),
1180 gen3_fs_operand(mask_reg, W, W, W, W));
1184 /* If component alpha is active in the mask and the blend
1185 * operation uses the source alpha, then we know we don't
1186 * need the source value (otherwise we would have hit a
1187 * fallback earlier), so we provide the source alpha (src.A *
1188 * mask.X) as output color.
1189 * Conversely, if CA is set and we don't need the source alpha,
1190 * then we produce the source value (src.X * mask.X) and the
1191 * source alpha is unused. Otherwise, we provide the non-CA
1192 * source value (src.X * mask.A).
1194 if (op->has_component_alpha) {
1195 switch (src->u.gen3.type) {
1197 if (gen3_blend_op[blend].src_alpha)
1198 gen3_fs_mov(out_reg,
1199 gen3_fs_operand_reg(mask_reg));
1201 gen3_fs_mov(out_reg,
1202 gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
1205 gen3_fs_mov(out_reg,
1206 gen3_fs_operand_reg(mask_reg));
1209 if (gen3_blend_op[blend].src_alpha)
1210 gen3_fs_mul(out_reg,
1211 gen3_fs_operand(src_reg, W, W, W, W),
1212 gen3_fs_operand_reg(mask_reg));
1214 gen3_fs_mul(out_reg,
1215 gen3_fs_operand_reg(src_reg),
1216 gen3_fs_operand_reg(mask_reg));
1220 switch (src->u.gen3.type) {
1222 gen3_fs_mov(out_reg,
1223 gen3_fs_operand(mask_reg, W, W, W, W));
1226 gen3_fs_mov(out_reg,
1227 gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
1230 gen3_fs_mul(out_reg,
1231 gen3_fs_operand_reg(src_reg),
1232 gen3_fs_operand(mask_reg, W, W, W, W));
1238 if (op->rb_reversed)
1239 gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W));
1243 length = sna->kgem.nbatch - shader_offset;
1244 sna->kgem.batch[shader_offset] =
1245 _3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2);
1248 static uint32_t gen3_ms_tiling(uint32_t tiling)
1252 case I915_TILING_Y: v |= MS3_TILE_WALK;
1253 case I915_TILING_X: v |= MS3_TILED_SURFACE;
1254 case I915_TILING_NONE: break;
1259 static void gen3_emit_invariant(struct sna *sna)
1261 /* Disable independent alpha blend */
1262 OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
1263 IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT |
1264 IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT |
1265 IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT);
1267 OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
1277 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
1278 OUT_BATCH(0); /* Disable texture coordinate wrap-shortest */
1279 OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
1283 OUT_BATCH(0); /* Disable fog/stencil. *Enable* write mask. */
1284 OUT_BATCH(S6_COLOR_WRITE_ONLY); /* Disable blending, depth */
1286 OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
1287 OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
1289 OUT_BATCH(_3DSTATE_LOAD_INDIRECT);
1290 OUT_BATCH(0x00000000);
1292 OUT_BATCH(_3DSTATE_STIPPLE);
1293 OUT_BATCH(0x00000000);
1295 sna->render_state.gen3.need_invariant = false;
1298 #define MAX_OBJECTS 3 /* worst case: dst + src + mask */
1301 gen3_get_batch(struct sna *sna)
1303 kgem_set_mode(&sna->kgem, KGEM_RENDER);
1305 if (!kgem_check_batch(&sna->kgem, 200)) {
1306 DBG(("%s: flushing batch: size %d > %d\n",
1308 sna->kgem.surface-sna->kgem.nbatch));
1309 kgem_submit(&sna->kgem);
1310 _kgem_set_mode(&sna->kgem, KGEM_RENDER);
1313 if (!kgem_check_reloc(&sna->kgem, MAX_OBJECTS)) {
1314 DBG(("%s: flushing batch: reloc %d >= %d\n",
1317 (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS));
1318 kgem_submit(&sna->kgem);
1319 _kgem_set_mode(&sna->kgem, KGEM_RENDER);
1322 if (!kgem_check_exec(&sna->kgem, MAX_OBJECTS)) {
1323 DBG(("%s: flushing batch: exec %d >= %d\n",
1326 (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1));
1327 kgem_submit(&sna->kgem);
1328 _kgem_set_mode(&sna->kgem, KGEM_RENDER);
1331 if (sna->render_state.gen3.need_invariant)
1332 gen3_emit_invariant(sna);
1336 static void gen3_emit_target(struct sna *sna,
1342 struct gen3_render_state *state = &sna->render_state.gen3;
1344 assert(!too_large(width, height));
1346 /* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
1347 assert(bo->unique_id != 0);
1348 if (bo->unique_id != state->current_dst) {
1351 DBG(("%s: setting new target id=%d, handle=%d\n",
1352 __FUNCTION__, bo->unique_id, bo->handle));
1354 OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
1355 OUT_BATCH(BUF_3D_ID_COLOR_BACK |
1356 gen3_buf_tiling(bo->tiling) |
1358 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
1360 I915_GEM_DOMAIN_RENDER << 16 |
1361 I915_GEM_DOMAIN_RENDER,
1364 OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
1365 OUT_BATCH(gen3_get_dst_format(format));
1367 v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1);
1368 if (v != state->last_drawrect_limit) {
1369 OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
1370 OUT_BATCH(0); /* XXX dither origin? */
1374 state->last_drawrect_limit = v;
1377 state->current_dst = bo->unique_id;
1379 kgem_bo_mark_dirty(bo);
1382 static void gen3_emit_composite_state(struct sna *sna,
1383 const struct sna_composite_op *op)
1385 struct gen3_render_state *state = &sna->render_state.gen3;
1387 uint32_t sampler[4];
1388 struct kgem_bo *bo[2];
1389 unsigned int tex_count, n;
1392 gen3_get_batch(sna);
1394 if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
1395 if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
1396 OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
1398 OUT_BATCH(_3DSTATE_MODES_5_CMD |
1399 PIPELINE_FLUSH_RENDER_CACHE |
1400 PIPELINE_FLUSH_TEXTURE_CACHE);
1401 kgem_clear_dirty(&sna->kgem);
1404 gen3_emit_target(sna,
1412 switch (op->src.u.gen3.type) {
1413 case SHADER_OPACITY:
1420 case SHADER_CONSTANT:
1421 if (op->src.u.gen3.mode != state->last_diffuse) {
1422 OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
1423 OUT_BATCH(op->src.u.gen3.mode);
1424 state->last_diffuse = op->src.u.gen3.mode;
1429 case SHADER_TEXTURE:
1430 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
1431 ss2 |= S2_TEXCOORD_FMT(tex_count,
1432 op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
1433 map[tex_count * 2 + 0] =
1434 op->src.card_format |
1435 gen3_ms_tiling(op->src.bo->tiling) |
1436 (op->src.height - 1) << MS3_HEIGHT_SHIFT |
1437 (op->src.width - 1) << MS3_WIDTH_SHIFT;
1438 map[tex_count * 2 + 1] =
1439 (op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
1441 sampler[tex_count * 2 + 0] = op->src.filter;
1442 sampler[tex_count * 2 + 1] =
1444 tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
1445 bo[tex_count] = op->src.bo;
1449 switch (op->mask.u.gen3.type) {
1455 case SHADER_CONSTANT:
1456 if (op->mask.u.gen3.mode != state->last_specular) {
1457 OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
1458 OUT_BATCH(op->mask.u.gen3.mode);
1459 state->last_specular = op->mask.u.gen3.mode;
1464 case SHADER_TEXTURE:
1465 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
1466 ss2 |= S2_TEXCOORD_FMT(tex_count,
1467 op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
1468 map[tex_count * 2 + 0] =
1469 op->mask.card_format |
1470 gen3_ms_tiling(op->mask.bo->tiling) |
1471 (op->mask.height - 1) << MS3_HEIGHT_SHIFT |
1472 (op->mask.width - 1) << MS3_WIDTH_SHIFT;
1473 map[tex_count * 2 + 1] =
1474 (op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
1476 sampler[tex_count * 2 + 0] = op->mask.filter;
1477 sampler[tex_count * 2 + 1] =
1479 tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
1480 bo[tex_count] = op->mask.bo;
1483 case SHADER_OPACITY:
1484 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
1485 ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D);
1490 uint32_t blend_offset = sna->kgem.nbatch;
1492 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
1494 OUT_BATCH(gen3_get_blend_cntl(op->op,
1495 op->has_component_alpha,
1498 if (memcmp(sna->kgem.batch + state->last_blend + 1,
1499 sna->kgem.batch + blend_offset + 1,
1501 sna->kgem.nbatch = blend_offset;
1503 state->last_blend = blend_offset;
1506 if (op->u.gen3.num_constants) {
1507 int count = op->u.gen3.num_constants;
1508 if (state->last_constants) {
1509 int last = sna->kgem.batch[state->last_constants+1];
1510 if (last == (1 << (count >> 2)) - 1 &&
1511 memcmp(&sna->kgem.batch[state->last_constants+2],
1512 op->u.gen3.constants,
1513 count * sizeof(uint32_t)) == 0)
1517 state->last_constants = sna->kgem.nbatch;
1518 OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count);
1519 OUT_BATCH((1 << (count >> 2)) - 1);
1521 memcpy(sna->kgem.batch + sna->kgem.nbatch,
1522 op->u.gen3.constants,
1523 count * sizeof(uint32_t));
1524 sna->kgem.nbatch += count;
1528 if (tex_count != 0) {
1532 if (tex_count == state->tex_count) {
1533 for (; n < tex_count; n++) {
1534 if (map[2*n+0] != state->tex_map[2*n+0] ||
1535 map[2*n+1] != state->tex_map[2*n+1] ||
1536 state->tex_handle[n] != bo[n]->handle ||
1537 state->tex_delta[n] != bo[n]->delta)
1541 if (n < tex_count) {
1542 OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
1543 OUT_BATCH((1 << tex_count) - 1);
1544 for (n = 0; n < tex_count; n++) {
1545 OUT_BATCH(kgem_add_reloc(&sna->kgem,
1548 I915_GEM_DOMAIN_SAMPLER<< 16,
1550 OUT_BATCH(map[2*n + 0]);
1551 OUT_BATCH(map[2*n + 1]);
1553 state->tex_map[2*n+0] = map[2*n+0];
1554 state->tex_map[2*n+1] = map[2*n+1];
1555 state->tex_handle[n] = bo[n]->handle;
1556 state->tex_delta[n] = bo[n]->delta;
1558 state->tex_count = n;
1561 rewind = sna->kgem.nbatch;
1562 OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
1563 OUT_BATCH((1 << tex_count) - 1);
1564 for (n = 0; n < tex_count; n++) {
1565 OUT_BATCH(sampler[2*n + 0]);
1566 OUT_BATCH(sampler[2*n + 1]);
1569 if (state->last_sampler &&
1570 memcmp(&sna->kgem.batch[state->last_sampler+1],
1571 &sna->kgem.batch[rewind + 1],
1572 (3*tex_count + 1)*sizeof(uint32_t)) == 0)
1573 sna->kgem.nbatch = rewind;
1575 state->last_sampler = rewind;
1578 gen3_composite_emit_shader(sna, op, op->op);
1581 static void gen3_magic_ca_pass(struct sna *sna,
1582 const struct sna_composite_op *op)
1584 if (!op->need_magic_ca_pass)
1587 DBG(("%s(%d)\n", __FUNCTION__,
1588 sna->render.vertex_index - sna->render.vertex_start));
1590 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
1591 OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, true, op->dst.format));
1592 gen3_composite_emit_shader(sna, op, PictOpAdd);
1594 OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
1595 (sna->render.vertex_index - sna->render.vertex_start));
1596 OUT_BATCH(sna->render.vertex_start);
1598 sna->render_state.gen3.last_blend = 0;
1601 static void gen3_vertex_flush(struct sna *sna)
1603 assert(sna->render_state.gen3.vertex_offset);
1605 DBG(("%s[%x] = %d\n", __FUNCTION__,
1606 4*sna->render_state.gen3.vertex_offset,
1607 sna->render.vertex_index - sna->render.vertex_start));
1609 sna->kgem.batch[sna->render_state.gen3.vertex_offset] =
1610 PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
1611 (sna->render.vertex_index - sna->render.vertex_start);
1612 sna->kgem.batch[sna->render_state.gen3.vertex_offset + 1] =
1613 sna->render.vertex_start;
1615 sna->render_state.gen3.vertex_offset = 0;
1618 static int gen3_vertex_finish(struct sna *sna)
1622 DBG(("%s: used=%d/%d, vbo active? %d\n",
1623 __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
1624 sna->render.vbo ? sna->render.vbo->handle : 0));
1625 assert(sna->render.vertex_used);
1626 assert(sna->render.vertex_used <= sna->render.vertex_size);
1628 bo = sna->render.vbo;
1630 if (sna->render_state.gen3.vertex_offset)
1631 gen3_vertex_flush(sna);
1633 DBG(("%s: reloc = %d\n", __FUNCTION__,
1634 sna->render.vertex_reloc[0]));
1636 sna->kgem.batch[sna->render.vertex_reloc[0]] =
1637 kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
1638 bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
1640 sna->render.vertex_reloc[0] = 0;
1641 sna->render.vertex_used = 0;
1642 sna->render.vertex_index = 0;
1644 kgem_bo_destroy(&sna->kgem, bo);
1647 sna->render.vertices = NULL;
1648 sna->render.vbo = kgem_create_linear(&sna->kgem,
1649 256*1024, CREATE_GTT_MAP);
1650 if (sna->render.vbo)
1651 sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
1652 if (sna->render.vertices == NULL) {
1653 if (sna->render.vbo)
1654 kgem_bo_destroy(&sna->kgem, sna->render.vbo);
1655 sna->render.vbo = NULL;
1658 assert(sna->render.vbo->snoop == false);
1660 if (sna->render.vertex_used) {
1661 memcpy(sna->render.vertices,
1662 sna->render.vertex_data,
1663 sizeof(float)*sna->render.vertex_used);
1665 sna->render.vertex_size = 64 * 1024 - 1;
1666 return sna->render.vertex_size - sna->render.vertex_used;
1669 static void gen3_vertex_close(struct sna *sna)
1671 struct kgem_bo *bo, *free_bo = NULL;
1672 unsigned int delta = 0;
1674 assert(sna->render_state.gen3.vertex_offset == 0);
1676 DBG(("%s: used=%d/%d, vbo active? %d\n",
1677 __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
1678 sna->render.vbo ? sna->render.vbo->handle : 0));
1680 if (sna->render.vertex_used == 0)
1683 bo = sna->render.vbo;
1685 if (sna->render.vertex_size - sna->render.vertex_used < 64) {
1686 DBG(("%s: discarding full vbo\n", __FUNCTION__));
1687 sna->render.vbo = NULL;
1688 sna->render.vertices = sna->render.vertex_data;
1689 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1691 } else if (IS_CPU_MAP(bo->map)) {
1692 DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
1693 sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo);
1694 if (sna->render.vertices == NULL) {
1695 DBG(("%s: discarding non-mappable vertices\n",__FUNCTION__));
1696 sna->render.vbo = NULL;
1697 sna->render.vertices = sna->render.vertex_data;
1698 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1703 if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
1704 DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
1705 sna->render.vertex_used, sna->kgem.nbatch));
1706 memcpy(sna->kgem.batch + sna->kgem.nbatch,
1707 sna->render.vertex_data,
1708 sna->render.vertex_used * 4);
1709 delta = sna->kgem.nbatch * 4;
1711 sna->kgem.nbatch += sna->render.vertex_used;
1713 DBG(("%s: new vbo: %d\n", __FUNCTION__,
1714 sna->render.vertex_used));
1715 bo = kgem_create_linear(&sna->kgem,
1716 4*sna->render.vertex_used, 0);
1718 assert(bo->snoop == false);
1719 kgem_bo_write(&sna->kgem, bo,
1720 sna->render.vertex_data,
1721 4*sna->render.vertex_used);
1727 DBG(("%s: reloc = %d\n", __FUNCTION__,
1728 sna->render.vertex_reloc[0]));
1730 if (sna->render.vertex_reloc[0]) {
1731 sna->kgem.batch[sna->render.vertex_reloc[0]] =
1732 kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
1733 bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
1734 sna->render.vertex_reloc[0] = 0;
1737 if (sna->render.vbo == NULL) {
1738 DBG(("%s: resetting vbo\n", __FUNCTION__));
1739 sna->render.vertex_used = 0;
1740 sna->render.vertex_index = 0;
1741 assert(sna->render.vertices == sna->render.vertex_data);
1742 assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
1746 kgem_bo_destroy(&sna->kgem, free_bo);
1749 static bool gen3_rectangle_begin(struct sna *sna,
1750 const struct sna_composite_op *op)
1752 struct gen3_render_state *state = &sna->render_state.gen3;
1753 int ndwords, i1_cmd = 0, i1_len = 0;
1756 if (op->need_magic_ca_pass)
1758 if (sna->render.vertex_reloc[0] == 0)
1759 i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++;
1760 if (state->floats_per_vertex != op->floats_per_vertex)
1761 i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++;
1763 if (!kgem_check_batch(&sna->kgem, ndwords+1))
1767 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1));
1768 if (sna->render.vertex_reloc[0] == 0)
1769 sna->render.vertex_reloc[0] = sna->kgem.nbatch++;
1770 if (state->floats_per_vertex != op->floats_per_vertex) {
1771 state->floats_per_vertex = op->floats_per_vertex;
1772 OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT |
1773 state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT);
1777 if (sna->kgem.nbatch == 2 + state->last_vertex_offset) {
1778 state->vertex_offset = state->last_vertex_offset;
1780 state->vertex_offset = sna->kgem.nbatch;
1781 OUT_BATCH(MI_NOOP); /* to be filled later */
1783 sna->render.vertex_start = sna->render.vertex_index;
1784 state->last_vertex_offset = state->vertex_offset;
1790 static int gen3_get_rectangles__flush(struct sna *sna,
1791 const struct sna_composite_op *op)
1793 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5))
1795 if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
1798 if (op->need_magic_ca_pass && sna->render.vbo)
1801 return gen3_vertex_finish(sna);
1804 inline static int gen3_get_rectangles(struct sna *sna,
1805 const struct sna_composite_op *op,
1810 DBG(("%s: want=%d, rem=%d\n",
1811 __FUNCTION__, want*op->floats_per_rect, vertex_space(sna)));
1813 assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used);
1816 rem = vertex_space(sna);
1817 if (unlikely(op->floats_per_rect > rem)) {
1818 DBG(("flushing vbo for %s: %d < %d\n",
1819 __FUNCTION__, rem, op->floats_per_rect));
1820 rem = gen3_get_rectangles__flush(sna, op);
1821 if (unlikely(rem == 0))
1825 if (unlikely(sna->render_state.gen3.vertex_offset == 0 &&
1826 !gen3_rectangle_begin(sna, op)))
1829 if (want > 1 && want * op->floats_per_rect > rem)
1830 want = rem / op->floats_per_rect;
1831 sna->render.vertex_index += 3*want;
1834 assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size);
1838 DBG(("%s: flushing batch\n", __FUNCTION__));
1839 if (sna->render_state.gen3.vertex_offset) {
1840 gen3_vertex_flush(sna);
1841 gen3_magic_ca_pass(sna, op);
1843 _kgem_submit(&sna->kgem);
1844 gen3_emit_composite_state(sna, op);
1848 fastcall static void
1849 gen3_render_composite_blt(struct sna *sna,
1850 const struct sna_composite_op *op,
1851 const struct sna_composite_rectangles *r)
1853 DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__,
1854 r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
1855 r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
1856 r->dst.x, r->dst.y, op->dst.x, op->dst.y,
1857 r->width, r->height));
1859 gen3_get_rectangles(sna, op, 1);
1861 op->prim_emit(sna, op, r);
1864 fastcall static void
1865 gen3_render_composite_box(struct sna *sna,
1866 const struct sna_composite_op *op,
1869 struct sna_composite_rectangles r;
1871 DBG(("%s: src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
1873 op->src.offset[0], op->src.offset[1],
1874 op->mask.offset[0], op->mask.offset[1],
1875 op->dst.x, op->dst.y));
1877 gen3_get_rectangles(sna, op, 1);
1881 r.width = box->x2 - box->x1;
1882 r.height = box->y2 - box->y1;
1883 r.src = r.mask = r.dst;
1885 op->prim_emit(sna, op, &r);
1889 gen3_render_composite_boxes(struct sna *sna,
1890 const struct sna_composite_op *op,
1891 const BoxRec *box, int nbox)
1893 DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
1895 op->src.offset[0], op->src.offset[1],
1896 op->mask.offset[0], op->mask.offset[1],
1897 op->dst.x, op->dst.y));
1902 nbox_this_time = gen3_get_rectangles(sna, op, nbox);
1903 nbox -= nbox_this_time;
1906 struct sna_composite_rectangles r;
1908 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
1911 box->y2 - box->y1));
1913 r.dst.x = box->x1; r.dst.y = box->y1;
1914 r.width = box->x2 - box->x1;
1915 r.height = box->y2 - box->y1;
1916 r.src = r.mask = r.dst;
1918 op->prim_emit(sna, op, &r);
1920 } while (--nbox_this_time);
1925 gen3_render_composite_done(struct sna *sna,
1926 const struct sna_composite_op *op)
1928 DBG(("%s()\n", __FUNCTION__));
1930 if (sna->render_state.gen3.vertex_offset) {
1931 gen3_vertex_flush(sna);
1932 gen3_magic_ca_pass(sna, op);
1936 kgem_bo_destroy(&sna->kgem, op->mask.bo);
1938 kgem_bo_destroy(&sna->kgem, op->src.bo);
1940 sna_render_composite_redirect_done(sna, op);
1944 discard_vbo(struct sna *sna)
1946 kgem_bo_destroy(&sna->kgem, sna->render.vbo);
1947 sna->render.vbo = NULL;
1948 sna->render.vertices = sna->render.vertex_data;
1949 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1950 sna->render.vertex_used = 0;
1951 sna->render.vertex_index = 0;
1955 gen3_render_reset(struct sna *sna)
1957 struct gen3_render_state *state = &sna->render_state.gen3;
1959 state->need_invariant = true;
1960 state->current_dst = 0;
1961 state->tex_count = 0;
1962 state->last_drawrect_limit = ~0U;
1963 state->last_target = 0;
1964 state->last_blend = 0;
1965 state->last_constants = 0;
1966 state->last_sampler = 0;
1967 state->last_shader = 0x7fffffff;
1968 state->last_diffuse = 0xcc00ffee;
1969 state->last_specular = 0xcc00ffee;
1971 state->floats_per_vertex = 0;
1972 state->last_floats_per_vertex = 0;
1973 state->last_vertex_offset = 0;
1974 state->vertex_offset = 0;
1976 if (sna->render.vbo != NULL &&
1977 !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
1978 DBG(("%s: discarding vbo as next access will stall: %d\n",
1979 __FUNCTION__, sna->render.vbo->presumed_offset));
1985 gen3_render_retire(struct kgem *kgem)
1989 sna = container_of(kgem, struct sna, kgem);
1990 if (sna->render.vertex_reloc[0] == 0 &&
1991 sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
1992 DBG(("%s: resetting idle vbo\n", __FUNCTION__));
1993 sna->render.vertex_used = 0;
1994 sna->render.vertex_index = 0;
1999 gen3_render_expire(struct kgem *kgem)
2003 sna = container_of(kgem, struct sna, kgem);
2004 if (sna->render.vbo && !sna->render.vertex_used) {
2005 DBG(("%s: discarding vbo\n", __FUNCTION__));
2010 static bool gen3_composite_channel_set_format(struct sna_composite_channel *channel,
2015 for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
2016 if (gen3_tex_formats[i].fmt == format) {
2017 channel->card_format = gen3_tex_formats[i].card_fmt;
2018 channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
2025 static bool source_is_covered(PicturePtr picture,
2027 int width, int height)
2031 if (picture->repeat && picture->repeatType != RepeatNone)
2034 if (picture->pDrawable == NULL)
2037 if (picture->transform) {
2038 pixman_box16_t sample;
2042 sample.x2 = x + width;
2043 sample.y2 = y + height;
2045 pixman_transform_bounds(picture->transform, &sample);
2059 x1 >= 0 && y1 >= 0 &&
2060 x2 <= picture->pDrawable->width &&
2061 y2 <= picture->pDrawable->height;
2064 static bool gen3_composite_channel_set_xformat(PicturePtr picture,
2065 struct sna_composite_channel *channel,
2067 int width, int height)
2071 if (PICT_FORMAT_A(picture->format) != 0)
2074 if (width == 0 || height == 0)
2077 if (!source_is_covered(picture, x, y, width, height))
2080 for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
2081 if (gen3_tex_formats[i].xfmt == picture->format) {
2082 channel->card_format = gen3_tex_formats[i].card_fmt;
2083 channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
2084 channel->alpha_fixup = true;
2093 gen3_init_solid(struct sna_composite_channel *channel, uint32_t color)
2095 channel->u.gen3.mode = color;
2096 channel->u.gen3.type = SHADER_CONSTANT;
2098 channel->u.gen3.type = SHADER_ZERO;
2099 else if (color == 0xff000000)
2100 channel->u.gen3.type = SHADER_BLACK;
2101 else if (color == 0xffffffff)
2102 channel->u.gen3.type = SHADER_WHITE;
2105 channel->is_opaque = (color >> 24) == 0xff;
2106 channel->is_affine = 1;
2107 channel->alpha_fixup = 0;
2108 channel->rb_reversed = 0;
2110 DBG(("%s: color=%08x, is_opaque=%d, type=%d\n",
2111 __FUNCTION__, color, channel->is_opaque, channel->u.gen3.type));
2113 /* for consistency */
2114 channel->repeat = RepeatNormal;
2115 channel->filter = PictFilterNearest;
2116 channel->pict_format = PICT_a8r8g8b8;
2117 channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
2122 static void gen3_composite_channel_convert(struct sna_composite_channel *channel)
2124 if (channel->u.gen3.type == SHADER_TEXTURE)
2125 channel->repeat = gen3_texture_repeat(channel->repeat);
2127 channel->repeat = gen3_gradient_repeat(channel->repeat);
2129 channel->filter = gen3_filter(channel->filter);
2130 if (channel->card_format == 0)
2131 gen3_composite_channel_set_format(channel, channel->pict_format);
2134 static bool gen3_gradient_setup(struct sna *sna,
2136 struct sna_composite_channel *channel,
2137 int16_t ox, int16_t oy)
2141 if (picture->repeat == 0) {
2142 channel->repeat = RepeatNone;
2143 } else switch (picture->repeatType) {
2148 channel->repeat = picture->repeatType;
2155 sna_render_get_gradient(sna,
2156 (PictGradient *)picture->pSourcePict);
2157 if (channel->bo == NULL)
2160 channel->pict_format = PICT_a8r8g8b8;
2161 channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
2162 channel->filter = PictFilterNearest;
2163 channel->is_affine = sna_transform_is_affine(picture->transform);
2164 if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
2165 DBG(("%s: integer translation (%d, %d), removing\n",
2166 __FUNCTION__, dx, dy));
2169 channel->transform = NULL;
2171 channel->transform = picture->transform;
2172 channel->width = channel->bo->pitch / 4;
2173 channel->height = 1;
2174 channel->offset[0] = ox;
2175 channel->offset[1] = oy;
2176 channel->scale[0] = channel->scale[1] = 1;
2181 gen3_init_linear(struct sna *sna,
2183 struct sna_composite_op *op,
2184 struct sna_composite_channel *channel,
2187 PictLinearGradient *linear =
2188 (PictLinearGradient *)picture->pSourcePict;
2190 float dx, dy, offset;
2193 DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
2195 xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
2196 xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
2198 if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
2201 dx = xFixedToDouble(linear->p2.x - linear->p1.x);
2202 dy = xFixedToDouble(linear->p2.y - linear->p1.y);
2207 x0 = xFixedToDouble(linear->p1.x);
2208 y0 = xFixedToDouble(linear->p1.y);
2209 offset = dx*x0 + dy*y0;
2211 n = op->u.gen3.num_constants;
2212 channel->u.gen3.constants = FS_C0 + n / 4;
2213 op->u.gen3.constants[n++] = dx;
2214 op->u.gen3.constants[n++] = dy;
2215 op->u.gen3.constants[n++] = -offset;
2216 op->u.gen3.constants[n++] = 0;
2218 if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
2221 channel->u.gen3.type = SHADER_LINEAR;
2222 op->u.gen3.num_constants = n;
2224 DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n",
2225 __FUNCTION__, dx, dy, -offset, channel->u.gen3.constants - FS_C0));
2230 gen3_init_radial(struct sna *sna,
2232 struct sna_composite_op *op,
2233 struct sna_composite_channel *channel,
2236 PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict;
2237 double dx, dy, dr, r1;
2240 dx = xFixedToDouble(radial->c2.x - radial->c1.x);
2241 dy = xFixedToDouble(radial->c2.y - radial->c1.y);
2242 dr = xFixedToDouble(radial->c2.radius - radial->c1.radius);
2244 r1 = xFixedToDouble(radial->c1.radius);
2246 n = op->u.gen3.num_constants;
2247 channel->u.gen3.constants = FS_C0 + n / 4;
2248 if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) {
2249 if (radial->c2.radius == radial->c1.radius) {
2250 channel->u.gen3.type = SHADER_ZERO;
2254 op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr;
2255 op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr;
2256 op->u.gen3.constants[n++] = 1. / dr;
2257 op->u.gen3.constants[n++] = -r1 / dr;
2259 channel->u.gen3.mode = RADIAL_ONE;
2261 op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x);
2262 op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y);
2263 op->u.gen3.constants[n++] = r1;
2264 op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr);
2266 op->u.gen3.constants[n++] = -2 * dx;
2267 op->u.gen3.constants[n++] = -2 * dy;
2268 op->u.gen3.constants[n++] = -2 * r1 * dr;
2269 op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr));
2271 channel->u.gen3.mode = RADIAL_TWO;
2274 if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
2277 channel->u.gen3.type = SHADER_RADIAL;
2278 op->u.gen3.num_constants = n;
2283 sna_picture_is_clear(PicturePtr picture,
2284 int x, int y, int w, int h,
2287 struct sna_pixmap *priv;
2289 if (!picture->pDrawable)
2292 priv = sna_pixmap(get_drawable_pixmap(picture->pDrawable));
2293 if (priv == NULL || !priv->clear)
2296 if (!source_is_covered(picture, x, y, w, h))
2299 *color = priv->clear_color;
2304 gen3_composite_picture(struct sna *sna,
2306 struct sna_composite_op *op,
2307 struct sna_composite_channel *channel,
2308 int16_t x, int16_t y,
2309 int16_t w, int16_t h,
2310 int16_t dst_x, int16_t dst_y,
2317 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
2318 __FUNCTION__, x, y, w, h, dst_x, dst_y));
2320 channel->card_format = 0;
2322 if (picture->pDrawable == NULL) {
2323 SourcePict *source = picture->pSourcePict;
2326 switch (source->type) {
2327 case SourcePictTypeSolidFill:
2328 DBG(("%s: solid fill [%08x], format %x\n",
2329 __FUNCTION__, source->solidFill.color, picture->format));
2330 ret = gen3_init_solid(channel, source->solidFill.color);
2333 case SourcePictTypeLinear:
2334 ret = gen3_init_linear(sna, picture, op, channel,
2335 x - dst_x, y - dst_y);
2338 case SourcePictTypeRadial:
2339 ret = gen3_init_radial(sna, picture, op, channel,
2340 x - dst_x, y - dst_y);
2346 ret = sna_render_picture_approximate_gradient(sna, picture, channel,
2347 x, y, w, h, dst_x, dst_y);
2349 ret = sna_render_picture_fixup(sna, picture, channel,
2350 x, y, w, h, dst_x, dst_y);
2355 if (picture->alphaMap) {
2356 DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
2357 return sna_render_picture_fixup(sna, picture, channel,
2358 x, y, w, h, dst_x, dst_y);
2361 if (sna_picture_is_solid(picture, &color)) {
2362 DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color));
2363 return gen3_init_solid(channel, color);
2366 if (sna_picture_is_clear(picture, x, y, w, h, &color)) {
2367 DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color));
2368 return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8));
2371 if (!gen3_check_repeat(picture))
2372 return sna_render_picture_fixup(sna, picture, channel,
2373 x, y, w, h, dst_x, dst_y);
2375 if (!gen3_check_filter(picture))
2376 return sna_render_picture_fixup(sna, picture, channel,
2377 x, y, w, h, dst_x, dst_y);
2379 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
2380 channel->filter = picture->filter;
2381 channel->pict_format = picture->format;
2383 pixmap = get_drawable_pixmap(picture->pDrawable);
2384 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
2386 x += dx + picture->pDrawable->x;
2387 y += dy + picture->pDrawable->y;
2389 if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
2390 DBG(("%s: integer translation (%d, %d), removing\n",
2391 __FUNCTION__, dx, dy));
2394 channel->transform = NULL;
2395 channel->filter = PictFilterNearest;
2397 channel->transform = picture->transform;
2398 channel->is_affine = sna_transform_is_affine(picture->transform);
2401 if (!gen3_composite_channel_set_format(channel, picture->format) &&
2402 !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
2403 return sna_render_picture_convert(sna, picture, channel, pixmap,
2404 x, y, w, h, dst_x, dst_y);
2406 if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
2407 DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
2409 pixmap->drawable.width, pixmap->drawable.height,
2411 return sna_render_picture_extract(sna, picture, channel,
2412 x, y, w, h, dst_x, dst_y);
2415 return sna_render_pixmap_bo(sna, channel, pixmap,
2416 x, y, w, h, dst_x, dst_y);
2420 source_use_blt(struct sna *sna, PicturePtr picture)
2422 /* If it is a solid, try to use the BLT paths */
2423 if (!picture->pDrawable)
2424 return picture->pSourcePict->type == SourcePictTypeSolidFill;
2426 if (picture->pDrawable->width == 1 &&
2427 picture->pDrawable->height == 1 &&
2431 if (too_large(picture->pDrawable->width, picture->pDrawable->height))
2434 return !is_gpu(picture->pDrawable);
2438 try_blt(struct sna *sna,
2441 int width, int height)
2443 if (sna->kgem.mode != KGEM_RENDER) {
2444 DBG(("%s: already performing BLT\n", __FUNCTION__));
2448 if (too_large(width, height)) {
2449 DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
2450 __FUNCTION__, width, height));
2454 if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
2455 DBG(("%s: target too large for 3D pipe (%d, %d)\n",
2457 dst->pDrawable->width, dst->pDrawable->height));
2461 /* is the source picture only in cpu memory e.g. a shm pixmap? */
2462 return source_use_blt(sna, src);
2466 gen3_align_vertex(struct sna *sna,
2467 const struct sna_composite_op *op)
2469 if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
2470 if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
2471 gen3_vertex_finish(sna);
2473 DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
2474 sna->render_state.gen3.last_floats_per_vertex,
2475 op->floats_per_vertex,
2476 sna->render.vertex_index,
2477 (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
2478 sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
2479 sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
2480 assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
2481 sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
2486 gen3_composite_set_target(struct sna *sna,
2487 struct sna_composite_op *op,
2489 int x, int y, int w, int h)
2493 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
2494 op->dst.format = dst->format;
2495 op->dst.width = op->dst.pixmap->drawable.width;
2496 op->dst.height = op->dst.pixmap->drawable.height;
2504 sna_render_picture_extents(dst, &box);
2506 op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
2507 PREFER_GPU | FORCE_GPU | RENDER_GPU,
2509 if (op->dst.bo == NULL)
2512 /* For single-stream mode there should be no minimum alignment
2513 * required, except that the width must be at least 2 elements.
2515 if (op->dst.bo->pitch < 2*op->dst.pixmap->drawable.bitsPerPixel) {
2516 struct sna_pixmap *priv;
2518 priv = sna_pixmap_move_to_gpu (op->dst.pixmap,
2519 MOVE_READ | MOVE_WRITE);
2520 if (priv == NULL || priv->pinned)
2523 if (priv->gpu_bo->pitch < 2*op->dst.pixmap->drawable.bitsPerPixel) {
2526 bo = kgem_replace_bo(&sna->kgem, priv->gpu_bo,
2527 op->dst.width, op->dst.height,
2528 2*op->dst.pixmap->drawable.bitsPerPixel,
2529 op->dst.pixmap->drawable.bitsPerPixel);
2533 kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
2537 op->dst.bo = priv->gpu_bo;
2538 op->damage = &priv->gpu_damage;
2539 if (sna_damage_is_all(op->damage,
2540 op->dst.width, op->dst.height))
2544 get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
2545 &op->dst.x, &op->dst.y);
2547 DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
2549 op->dst.pixmap, (int)op->dst.format,
2550 op->dst.width, op->dst.height,
2552 op->dst.x, op->dst.y,
2553 op->damage ? *op->damage : (void *)-1));
2555 assert(op->dst.bo->proxy == NULL);
2559 static inline uint8_t
2560 mul_8_8(uint8_t a, uint8_t b)
2562 uint16_t t = a * (uint16_t)b + 0x7f;
2563 return ((t >> 8) + t) >> 8;
2566 static inline uint32_t multa(uint32_t s, uint32_t m, int shift)
2568 return mul_8_8((s >> shift) & 0xff, m >> 24) << shift;
2571 static inline bool is_constant_ps(uint32_t type)
2574 case SHADER_NONE: /* be warned! */
2578 case SHADER_CONSTANT:
2586 has_alphamap(PicturePtr p)
2588 return p->alphaMap != NULL;
2592 untransformed(PicturePtr p)
2594 return !p->transform || pixman_transform_is_int_translate(p->transform);
2598 need_upload(PicturePtr p)
2600 return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
2604 source_is_busy(PixmapPtr pixmap)
2606 struct sna_pixmap *priv = sna_pixmap(pixmap);
2613 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
2616 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2619 return priv->gpu_damage && !priv->cpu_damage;
2623 source_fallback(PicturePtr p, PixmapPtr pixmap)
2625 if (sna_picture_is_solid(p, NULL))
2628 if (!gen3_check_xformat(p) || !gen3_check_repeat(p))
2631 if (pixmap && source_is_busy(pixmap))
2634 return has_alphamap(p) || !gen3_check_filter(p) || need_upload(p);
2638 gen3_composite_fallback(struct sna *sna,
2644 struct sna_pixmap *priv;
2645 PixmapPtr src_pixmap;
2646 PixmapPtr mask_pixmap;
2647 PixmapPtr dst_pixmap;
2648 bool src_fallback, mask_fallback;
2650 if (!gen3_check_dst_format(dst->format)) {
2651 DBG(("%s: unknown destination format: %d\n",
2652 __FUNCTION__, dst->format));
2656 dst_pixmap = get_drawable_pixmap(dst->pDrawable);
2658 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
2659 src_fallback = source_fallback(src, src_pixmap);
2662 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
2663 mask_fallback = source_fallback(mask, mask_pixmap);
2666 mask_fallback = false;
2669 /* If we are using the destination as a source and need to
2670 * readback in order to upload the source, do it all
2673 if (src_pixmap == dst_pixmap && src_fallback) {
2674 DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
2677 if (mask_pixmap == dst_pixmap && mask_fallback) {
2678 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
2683 mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
2685 gen3_blend_op[op].src_blend != BLENDFACT_ZERO)
2687 DBG(("%s: component-alpha mask with op=%d, should fallback\n",
2692 /* If anything is on the GPU, push everything out to the GPU */
2693 priv = sna_pixmap(dst_pixmap);
2694 if (priv && priv->gpu_damage && !priv->clear) {
2695 DBG(("%s: dst is already on the GPU, try to use GPU\n",
2700 if (src_pixmap && !src_fallback) {
2701 DBG(("%s: src is already on the GPU, try to use GPU\n",
2705 if (mask_pixmap && !mask_fallback) {
2706 DBG(("%s: mask is already on the GPU, try to use GPU\n",
2711 /* However if the dst is not on the GPU and we need to
2712 * render one of the sources using the CPU, we may
2713 * as well do the entire operation in place onthe CPU.
2716 DBG(("%s: dst is on the CPU and src will fallback\n",
2721 if (mask && mask_fallback) {
2722 DBG(("%s: dst is on the CPU and mask will fallback\n",
2727 if (too_large(dst_pixmap->drawable.width,
2728 dst_pixmap->drawable.height) &&
2729 (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) {
2730 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
2734 DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
2740 reuse_source(struct sna *sna,
2741 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
2742 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
2744 if (src_x != msk_x || src_y != msk_y)
2750 kgem_bo_reference(mc->bo);
2754 if ((src->pDrawable == NULL || mask->pDrawable != src->pDrawable))
2760 DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
2762 if (!sna_transform_equal(src->transform, mask->transform))
2765 if (!sna_picture_alphamap_equal(src, mask))
2768 if (!gen3_check_repeat(mask))
2771 if (!gen3_check_filter(mask))
2774 if (!gen3_check_format(mask))
2777 DBG(("%s: reusing source channel for mask with a twist\n",
2781 mc->repeat = gen3_texture_repeat(mask->repeat ? mask->repeatType : RepeatNone);
2782 mc->filter = gen3_filter(mask->filter);
2783 mc->pict_format = mask->format;
2784 gen3_composite_channel_set_format(mc, mask->format);
2786 kgem_bo_reference(mc->bo);
2791 gen3_render_composite(struct sna *sna,
2796 int16_t src_x, int16_t src_y,
2797 int16_t mask_x, int16_t mask_y,
2798 int16_t dst_x, int16_t dst_y,
2799 int16_t width, int16_t height,
2800 struct sna_composite_op *tmp)
2802 DBG(("%s()\n", __FUNCTION__));
2804 if (op >= ARRAY_SIZE(gen3_blend_op)) {
2805 DBG(("%s: fallback due to unhandled blend op: %d\n",
2810 /* Try to use the BLT engine unless it implies a
2811 * 3D -> 2D context switch.
2814 try_blt(sna, dst, src, width, height) &&
2815 sna_blt_composite(sna,
2823 if (gen3_composite_fallback(sna, op, src, mask, dst))
2826 if (need_tiling(sna, width, height))
2827 return sna_tiling_composite(op, src, mask, dst,
2834 if (!gen3_composite_set_target(sna, tmp, dst,
2835 dst_x, dst_y, width, height)) {
2836 DBG(("%s: unable to set render target\n",
2842 tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
2843 if (too_large(tmp->dst.width, tmp->dst.height) ||
2844 !gen3_check_pitch_3d(tmp->dst.bo)) {
2845 if (!sna_render_composite_redirect(sna, tmp,
2846 dst_x, dst_y, width, height))
2850 tmp->u.gen3.num_constants = 0;
2851 tmp->src.u.gen3.type = SHADER_TEXTURE;
2852 tmp->src.is_affine = true;
2853 DBG(("%s: preparing source\n", __FUNCTION__));
2854 switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
2858 dst->polyMode == PolyModePrecise)) {
2862 tmp->src.u.gen3.type = SHADER_ZERO;
2865 if (mask == NULL && tmp->src.bo &&
2866 sna_blt_composite__convert(sna,
2867 dst_x, dst_y, width, height,
2871 gen3_composite_channel_convert(&tmp->src);
2874 DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type));
2876 tmp->mask.u.gen3.type = SHADER_NONE;
2877 tmp->mask.is_affine = true;
2878 tmp->need_magic_ca_pass = false;
2879 tmp->has_component_alpha = false;
2880 if (mask && tmp->src.u.gen3.type != SHADER_ZERO) {
2881 if (!reuse_source(sna,
2882 src, &tmp->src, src_x, src_y,
2883 mask, &tmp->mask, mask_x, mask_y)) {
2884 tmp->mask.u.gen3.type = SHADER_TEXTURE;
2885 DBG(("%s: preparing mask\n", __FUNCTION__));
2886 switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
2890 dst->polyMode == PolyModePrecise)) {
2894 tmp->mask.u.gen3.type = SHADER_ZERO;
2897 gen3_composite_channel_convert(&tmp->mask);
2901 DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type));
2902 if (tmp->mask.u.gen3.type == SHADER_ZERO) {
2904 kgem_bo_destroy(&sna->kgem,
2908 tmp->src.u.gen3.type = SHADER_ZERO;
2909 tmp->mask.u.gen3.type = SHADER_NONE;
2912 if (tmp->mask.u.gen3.type != SHADER_NONE) {
2913 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
2914 /* Check if it's component alpha that relies on a source alpha
2915 * and on the source value. We can only get one of those
2916 * into the single source value that we get to blend with.
2918 DBG(("%s: component-alpha mask: %d\n",
2919 __FUNCTION__, tmp->mask.u.gen3.type));
2920 tmp->has_component_alpha = true;
2921 if (tmp->mask.u.gen3.type == SHADER_WHITE) {
2922 tmp->mask.u.gen3.type = SHADER_NONE;
2923 tmp->has_component_alpha = false;
2924 } else if (gen3_blend_op[op].src_alpha &&
2925 (gen3_blend_op[op].src_blend != BLENDFACT_ZERO)) {
2926 if (op != PictOpOver)
2929 tmp->need_magic_ca_pass = true;
2930 tmp->op = PictOpOutReverse;
2931 sna->render.vertex_start = sna->render.vertex_index;
2934 if (tmp->mask.is_opaque) {
2935 tmp->mask.u.gen3.type = SHADER_NONE;
2936 } else if (is_constant_ps(tmp->src.u.gen3.type) &&
2937 is_constant_ps(tmp->mask.u.gen3.type)) {
2940 v = multa(tmp->src.u.gen3.mode,
2941 tmp->mask.u.gen3.mode,
2943 v |= multa(tmp->src.u.gen3.mode,
2944 tmp->mask.u.gen3.mode,
2946 v |= multa(tmp->src.u.gen3.mode,
2947 tmp->mask.u.gen3.mode,
2949 v |= multa(tmp->src.u.gen3.mode,
2950 tmp->mask.u.gen3.mode,
2953 DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
2955 tmp->src.u.gen3.mode,
2956 tmp->mask.u.gen3.mode,
2959 tmp->src.u.gen3.type = SHADER_CONSTANT;
2960 tmp->src.u.gen3.mode = v;
2961 tmp->src.is_opaque = false;
2963 tmp->mask.u.gen3.type = SHADER_NONE;
2968 DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__,
2969 tmp->src.u.gen3.type, tmp->mask.u.gen3.type,
2970 tmp->src.is_affine, tmp->mask.is_affine));
2972 tmp->prim_emit = gen3_emit_composite_primitive;
2973 if (is_constant_ps(tmp->mask.u.gen3.type)) {
2974 switch (tmp->src.u.gen3.type) {
2979 case SHADER_CONSTANT:
2980 tmp->prim_emit = gen3_emit_composite_primitive_constant;
2984 if (tmp->src.transform == NULL)
2985 tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
2986 else if (tmp->src.is_affine)
2987 tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
2989 case SHADER_TEXTURE:
2990 if (tmp->src.transform == NULL) {
2991 if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0)
2992 tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
2994 tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
2995 } else if (tmp->src.is_affine)
2996 tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
2999 } else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
3000 if (tmp->mask.transform == NULL) {
3001 if (is_constant_ps(tmp->src.u.gen3.type)) {
3002 if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0)
3003 tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset;
3005 tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
3006 } else if (tmp->src.transform == NULL)
3007 tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
3008 else if (tmp->src.is_affine)
3009 tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
3013 tmp->floats_per_vertex = 2;
3014 if (!is_constant_ps(tmp->src.u.gen3.type))
3015 tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
3016 if (!is_constant_ps(tmp->mask.u.gen3.type))
3017 tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
3018 DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
3019 !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
3020 !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
3021 tmp->floats_per_vertex,
3022 tmp->prim_emit != gen3_emit_composite_primitive));
3023 tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
3025 tmp->blt = gen3_render_composite_blt;
3026 tmp->box = gen3_render_composite_box;
3027 tmp->boxes = gen3_render_composite_boxes;
3028 tmp->done = gen3_render_composite_done;
3030 if (!kgem_check_bo(&sna->kgem,
3031 tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
3033 kgem_submit(&sna->kgem);
3034 if (!kgem_check_bo(&sna->kgem,
3035 tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
3040 gen3_emit_composite_state(sna, tmp);
3041 gen3_align_vertex(sna, tmp);
3046 kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
3049 kgem_bo_destroy(&sna->kgem, tmp->src.bo);
3051 if (tmp->redirect.real_bo)
3052 kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
3057 gen3_emit_composite_spans_vertex(struct sna *sna,
3058 const struct sna_composite_spans_op *op,
3059 int16_t x, int16_t y,
3062 gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
3063 gen3_emit_composite_texcoord(sna, &op->base.src, x, y);
3064 OUT_VERTEX(opacity);
3067 fastcall static void
3068 gen3_emit_composite_spans_primitive_zero(struct sna *sna,
3069 const struct sna_composite_spans_op *op,
3073 float *v = sna->render.vertices + sna->render.vertex_used;
3074 sna->render.vertex_used += 6;
3076 v[0] = op->base.dst.x + box->x2;
3077 v[1] = op->base.dst.y + box->y2;
3079 v[2] = op->base.dst.x + box->x1;
3083 v[5] = op->base.dst.x + box->y1;
3086 fastcall static void
3087 gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
3088 const struct sna_composite_spans_op *op,
3092 float *v = sna->render.vertices + sna->render.vertex_used;
3093 sna->render.vertex_used += 6;
3096 v[3] = v[1] = box->y2;
3097 v[4] = v[2] = box->x1;
3101 fastcall static void
3102 gen3_emit_composite_spans_primitive_constant(struct sna *sna,
3103 const struct sna_composite_spans_op *op,
3107 float *v = sna->render.vertices + sna->render.vertex_used;
3108 sna->render.vertex_used += 9;
3110 v[0] = op->base.dst.x + box->x2;
3111 v[6] = v[3] = op->base.dst.x + box->x1;
3112 v[4] = v[1] = op->base.dst.y + box->y2;
3113 v[7] = op->base.dst.y + box->y1;
3114 v[8] = v[5] = v[2] = opacity;
3117 fastcall static void
3118 gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
3119 const struct sna_composite_spans_op *op,
3123 float *v = sna->render.vertices + sna->render.vertex_used;
3124 sna->render.vertex_used += 9;
3127 v[6] = v[3] = box->x1;
3128 v[4] = v[1] = box->y2;
3130 v[8] = v[5] = v[2] = opacity;
3133 fastcall static void
3134 gen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
3135 const struct sna_composite_spans_op *op,
3139 float *v = sna->render.vertices + sna->render.vertex_used;
3140 sna->render.vertex_used += 15;
3142 v[0] = op->base.dst.x + box->x2;
3143 v[1] = op->base.dst.y + box->y2;
3144 v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
3145 v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
3148 v[5] = op->base.dst.x + box->x1;
3150 v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
3155 v[11] = op->base.dst.y + box->y1;
3157 v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
3161 fastcall static void
3162 gen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
3163 const struct sna_composite_spans_op *op,
3167 PictTransform *transform = op->base.src.transform;
3170 v = sna->render.vertices + sna->render.vertex_used;
3171 sna->render.vertex_used += 15;
3173 v[0] = op->base.dst.x + box->x2;
3174 v[6] = v[1] = op->base.dst.y + box->y2;
3175 v[10] = v[5] = op->base.dst.x + box->x1;
3176 v[11] = op->base.dst.y + box->y1;
3181 _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2,
3182 (int)op->base.src.offset[1] + box->y2,
3185 v[2] = x * op->base.src.scale[0];
3186 v[3] = y * op->base.src.scale[1];
3188 _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
3189 (int)op->base.src.offset[1] + box->y2,
3192 v[7] = x * op->base.src.scale[0];
3193 v[8] = y * op->base.src.scale[1];
3195 _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
3196 (int)op->base.src.offset[1] + box->y1,
3199 v[12] = x * op->base.src.scale[0];
3200 v[13] = y * op->base.src.scale[1];
3203 fastcall static void
3204 gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
3205 const struct sna_composite_spans_op *op,
3209 float *v = sna->render.vertices + sna->render.vertex_used;
3210 sna->render.vertex_used += 15;
3212 v[0] = op->base.dst.x + box->x2;
3213 v[1] = op->base.dst.y + box->y2;
3214 v[2] = op->base.src.offset[0] + box->x2;
3215 v[3] = op->base.src.offset[1] + box->y2;
3218 v[5] = op->base.dst.x + box->x1;
3220 v[7] = op->base.src.offset[0] + box->x1;
3225 v[11] = op->base.dst.y + box->y1;
3227 v[13] = op->base.src.offset[1] + box->y1;
3231 fastcall static void
3232 gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
3233 const struct sna_composite_spans_op *op,
3237 PictTransform *transform = op->base.src.transform;
3238 float *v = sna->render.vertices + sna->render.vertex_used;
3239 sna->render.vertex_used += 15;
3241 v[0] = op->base.dst.x + box->x2;
3242 v[1] = op->base.dst.y + box->y2;
3243 _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2,
3244 (int)op->base.src.offset[1] + box->y2,
3249 v[5] = op->base.dst.x + box->x1;
3251 _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
3252 (int)op->base.src.offset[1] + box->y2,
3258 v[11] = op->base.dst.y + box->y1;
3259 _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
3260 (int)op->base.src.offset[1] + box->y1,
3266 fastcall static void
3267 gen3_emit_composite_spans_primitive(struct sna *sna,
3268 const struct sna_composite_spans_op *op,
3272 gen3_emit_composite_spans_vertex(sna, op,
3275 gen3_emit_composite_spans_vertex(sna, op,
3278 gen3_emit_composite_spans_vertex(sna, op,
3283 fastcall static void
3284 gen3_render_composite_spans_constant_box(struct sna *sna,
3285 const struct sna_composite_spans_op *op,
3286 const BoxRec *box, float opacity)
3289 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
3291 op->base.src.offset[0], op->base.src.offset[1],
3293 op->base.dst.x, op->base.dst.y,
3296 box->y2 - box->y1));
3298 gen3_get_rectangles(sna, &op->base, 1);
3300 v = sna->render.vertices + sna->render.vertex_used;
3301 sna->render.vertex_used += 9;
3304 v[6] = v[3] = box->x1;
3305 v[4] = v[1] = box->y2;
3307 v[8] = v[5] = v[2] = opacity;
3310 fastcall static void
3311 gen3_render_composite_spans_box(struct sna *sna,
3312 const struct sna_composite_spans_op *op,
3313 const BoxRec *box, float opacity)
3315 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
3317 op->base.src.offset[0], op->base.src.offset[1],
3319 op->base.dst.x, op->base.dst.y,
3322 box->y2 - box->y1));
3324 gen3_get_rectangles(sna, &op->base, 1);
3325 op->prim_emit(sna, op, box, opacity);
3329 gen3_render_composite_spans_boxes(struct sna *sna,
3330 const struct sna_composite_spans_op *op,
3331 const BoxRec *box, int nbox,
3334 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
3336 op->base.src.offset[0], op->base.src.offset[1],
3338 op->base.dst.x, op->base.dst.y));
3343 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
3344 nbox -= nbox_this_time;
3347 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
3350 box->y2 - box->y1));
3352 op->prim_emit(sna, op, box++, opacity);
3353 } while (--nbox_this_time);
3357 fastcall static void
3358 gen3_render_composite_spans_done(struct sna *sna,
3359 const struct sna_composite_spans_op *op)
3361 if (sna->render_state.gen3.vertex_offset)
3362 gen3_vertex_flush(sna);
3364 DBG(("%s()\n", __FUNCTION__));
3366 if (op->base.src.bo)
3367 kgem_bo_destroy(&sna->kgem, op->base.src.bo);
3369 sna_render_composite_redirect_done(sna, &op->base);
3373 gen3_check_composite_spans(struct sna *sna,
3374 uint8_t op, PicturePtr src, PicturePtr dst,
3375 int16_t width, int16_t height, unsigned flags)
3377 if (op >= ARRAY_SIZE(gen3_blend_op))
3380 if (gen3_composite_fallback(sna, op, src, NULL, dst))
3383 if (need_tiling(sna, width, height)) {
3384 if (!is_gpu(dst->pDrawable)) {
3385 DBG(("%s: fallback, tiled operation not on GPU\n",
3395 gen3_render_composite_spans(struct sna *sna,
3399 int16_t src_x, int16_t src_y,
3400 int16_t dst_x, int16_t dst_y,
3401 int16_t width, int16_t height,
3403 struct sna_composite_spans_op *tmp)
3407 DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
3408 src_x, src_y, dst_x, dst_y, width, height));
3410 assert(gen3_check_composite_spans(sna, op, src, dst, width, height, flags));
3412 if (need_tiling(sna, width, height)) {
3413 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
3414 __FUNCTION__, width, height));
3415 return sna_tiling_composite_spans(op, src, dst,
3416 src_x, src_y, dst_x, dst_y,
3417 width, height, flags, tmp);
3420 if (!gen3_composite_set_target(sna, &tmp->base, dst,
3421 dst_x, dst_y, width, height)) {
3422 DBG(("%s: unable to set render target\n",
3428 tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format);
3429 if (too_large(tmp->base.dst.width, tmp->base.dst.height) ||
3430 !gen3_check_pitch_3d(tmp->base.dst.bo)) {
3431 if (!sna_render_composite_redirect(sna, &tmp->base,
3432 dst_x, dst_y, width, height))
3436 tmp->base.src.u.gen3.type = SHADER_TEXTURE;
3437 tmp->base.src.is_affine = true;
3438 DBG(("%s: preparing source\n", __FUNCTION__));
3439 switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src,
3443 dst->polyMode == PolyModePrecise)) {
3447 tmp->base.src.u.gen3.type = SHADER_ZERO;
3450 gen3_composite_channel_convert(&tmp->base.src);
3453 DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.u.gen3.type));
3455 if (tmp->base.src.u.gen3.type != SHADER_ZERO)
3456 tmp->base.mask.u.gen3.type = SHADER_OPACITY;
3458 no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0;
3459 tmp->box = gen3_render_composite_spans_box;
3460 tmp->boxes = gen3_render_composite_spans_boxes;
3461 tmp->done = gen3_render_composite_spans_done;
3462 tmp->prim_emit = gen3_emit_composite_spans_primitive;
3463 switch (tmp->base.src.u.gen3.type) {
3467 tmp->prim_emit = no_offset ? gen3_emit_composite_spans_primitive_zero_no_offset : gen3_emit_composite_spans_primitive_zero;
3471 case SHADER_CONSTANT:
3473 tmp->box = gen3_render_composite_spans_constant_box;
3474 tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset;
3476 tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
3480 if (tmp->base.src.transform == NULL)
3481 tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
3482 else if (tmp->base.src.is_affine)
3483 tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient;
3485 case SHADER_TEXTURE:
3486 if (tmp->base.src.transform == NULL)
3487 tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
3488 else if (tmp->base.src.is_affine)
3489 tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
3493 tmp->base.mask.bo = NULL;
3495 tmp->base.floats_per_vertex = 2;
3496 if (!is_constant_ps(tmp->base.src.u.gen3.type))
3497 tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
3498 tmp->base.floats_per_vertex +=
3499 tmp->base.mask.u.gen3.type == SHADER_OPACITY;
3500 tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
3502 if (!kgem_check_bo(&sna->kgem,
3503 tmp->base.dst.bo, tmp->base.src.bo,
3505 kgem_submit(&sna->kgem);
3506 if (!kgem_check_bo(&sna->kgem,
3507 tmp->base.dst.bo, tmp->base.src.bo,
3512 gen3_emit_composite_state(sna, &tmp->base);
3513 gen3_align_vertex(sna, &tmp->base);
3517 if (tmp->base.src.bo)
3518 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
3520 if (tmp->base.redirect.real_bo)
3521 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
3526 gen3_emit_video_state(struct sna *sna,
3527 struct sna_video *video,
3528 struct sna_video_frame *frame,
3530 struct kgem_bo *dst_bo,
3531 int width, int height)
3533 struct gen3_render_state *state = &sna->render_state.gen3;
3534 uint32_t id, ms3, rewind;
3536 gen3_emit_target(sna, dst_bo, width, height,
3537 sna_format_for_depth(pixmap->drawable.depth));
3539 /* XXX share with composite? Is it worth the effort? */
3540 if ((state->last_shader & (1<<31)) == 0) {
3541 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
3542 I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) |
3544 OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
3545 OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
3546 S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
3547 S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
3548 S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
3549 S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
3550 S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
3551 S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
3552 S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
3553 OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
3554 (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
3555 S6_COLOR_WRITE_ENABLE);
3557 state->last_blend = 0;
3558 state->floats_per_vertex = 4;
3561 if (!is_planar_fourcc(frame->id)) {
3562 rewind = sna->kgem.nbatch;
3563 OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
3564 OUT_BATCH(0x0000001); /* constant 0 */
3565 /* constant 0: brightness/contrast */
3566 OUT_BATCH_F(video->brightness / 128.0);
3567 OUT_BATCH_F(video->contrast / 255.0);
3570 if (state->last_constants &&
3571 memcmp(&sna->kgem.batch[state->last_constants],
3572 &sna->kgem.batch[rewind],
3573 6*sizeof(uint32_t)) == 0)
3574 sna->kgem.nbatch = rewind;
3576 state->last_constants = rewind;
3578 rewind = sna->kgem.nbatch;
3579 OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
3580 OUT_BATCH(0x00000001);
3581 OUT_BATCH(SS2_COLORSPACE_CONVERSION |
3582 (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
3583 (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
3584 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
3585 SS3_TCX_ADDR_MODE_SHIFT) |
3586 (TEXCOORDMODE_CLAMP_EDGE <<
3587 SS3_TCY_ADDR_MODE_SHIFT) |
3588 (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
3589 SS3_NORMALIZED_COORDS);
3590 OUT_BATCH(0x00000000);
3591 if (state->last_sampler &&
3592 memcmp(&sna->kgem.batch[state->last_sampler],
3593 &sna->kgem.batch[rewind],
3594 5*sizeof(uint32_t)) == 0)
3595 sna->kgem.nbatch = rewind;
3597 state->last_sampler = rewind;
3599 OUT_BATCH(_3DSTATE_MAP_STATE | 3);
3600 OUT_BATCH(0x00000001); /* texture map #1 */
3601 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
3603 I915_GEM_DOMAIN_SAMPLER << 16,
3607 switch (frame->id) {
3609 ms3 |= MT_422_YCRCB_NORMAL;
3612 ms3 |= MT_422_YCRCB_SWAPY;
3615 ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
3616 ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
3618 OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
3620 id = 1<<31 | 1<<1 | !!video->brightness;
3621 if (state->last_shader != id) {
3622 state->last_shader = id;
3623 id = sna->kgem.nbatch++;
3627 gen3_fs_texld(FS_OC, FS_S0, FS_T0);
3628 if (video->brightness != 0) {
3630 gen3_fs_operand_reg(FS_OC),
3631 gen3_fs_operand(FS_C0, X, X, X, ZERO));
3634 sna->kgem.batch[id] =
3635 _3DSTATE_PIXEL_SHADER_PROGRAM |
3636 (sna->kgem.nbatch - id - 2);
3639 /* For the planar formats, we set up three samplers --
3640 * one for each plane, in a Y8 format. Because I
3641 * couldn't get the special PLANAR_TO_PACKED
3642 * shader setup to work, I did the manual pixel shader:
3648 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
3649 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
3650 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
3652 * register assignment:
3659 rewind = sna->kgem.nbatch;
3660 OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
3661 OUT_BATCH(0x000001f); /* constants 0-4 */
3662 /* constant 0: normalization offsets */
3663 OUT_BATCH_F(-0.0625);
3667 /* constant 1: r coefficients */
3668 OUT_BATCH_F(1.1643);
3670 OUT_BATCH_F(1.5958);
3672 /* constant 2: g coefficients */
3673 OUT_BATCH_F(1.1643);
3674 OUT_BATCH_F(-0.39173);
3675 OUT_BATCH_F(-0.81290);
3677 /* constant 3: b coefficients */
3678 OUT_BATCH_F(1.1643);
3682 /* constant 4: brightness/contrast */
3683 OUT_BATCH_F(video->brightness / 128.0);
3684 OUT_BATCH_F(video->contrast / 255.0);
3687 if (state->last_constants &&
3688 memcmp(&sna->kgem.batch[state->last_constants],
3689 &sna->kgem.batch[rewind],
3690 22*sizeof(uint32_t)) == 0)
3691 sna->kgem.nbatch = rewind;
3693 state->last_constants = rewind;
3695 rewind = sna->kgem.nbatch;
3696 OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
3697 OUT_BATCH(0x00000007);
3699 OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
3700 (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
3701 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
3702 SS3_TCX_ADDR_MODE_SHIFT) |
3703 (TEXCOORDMODE_CLAMP_EDGE <<
3704 SS3_TCY_ADDR_MODE_SHIFT) |
3705 (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
3706 SS3_NORMALIZED_COORDS);
3707 OUT_BATCH(0x00000000);
3709 OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
3710 (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
3711 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
3712 SS3_TCX_ADDR_MODE_SHIFT) |
3713 (TEXCOORDMODE_CLAMP_EDGE <<
3714 SS3_TCY_ADDR_MODE_SHIFT) |
3715 (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
3716 SS3_NORMALIZED_COORDS);
3717 OUT_BATCH(0x00000000);
3719 OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
3720 (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
3721 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
3722 SS3_TCX_ADDR_MODE_SHIFT) |
3723 (TEXCOORDMODE_CLAMP_EDGE <<
3724 SS3_TCY_ADDR_MODE_SHIFT) |
3725 (2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
3726 SS3_NORMALIZED_COORDS);
3727 OUT_BATCH(0x00000000);
3728 if (state->last_sampler &&
3729 memcmp(&sna->kgem.batch[state->last_sampler],
3730 &sna->kgem.batch[rewind],
3731 11*sizeof(uint32_t)) == 0)
3732 sna->kgem.nbatch = rewind;
3734 state->last_sampler = rewind;
3736 OUT_BATCH(_3DSTATE_MAP_STATE | 9);
3737 OUT_BATCH(0x00000007);
3739 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
3741 I915_GEM_DOMAIN_SAMPLER << 16,
3744 ms3 = MAPSURF_8BIT | MT_8BIT_I8;
3745 ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
3746 ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
3748 /* check to see if Y has special pitch than normal
3749 * double u/v pitch, e.g i915 XvMC hw requires at
3750 * least 1K alignment, so Y pitch might
3751 * be same as U/V's.*/
3752 if (frame->pitch[1])
3753 OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT);
3755 OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT);
3757 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
3759 I915_GEM_DOMAIN_SAMPLER << 16,
3760 frame->UBufOffset));
3762 ms3 = MAPSURF_8BIT | MT_8BIT_I8;
3763 ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
3764 ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
3766 OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
3768 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
3770 I915_GEM_DOMAIN_SAMPLER << 16,
3771 frame->VBufOffset));
3773 ms3 = MAPSURF_8BIT | MT_8BIT_I8;
3774 ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
3775 ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
3777 OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
3779 id = 1<<31 | 2<<1 | !!video->brightness;
3780 if (state->last_shader != id) {
3781 state->last_shader = id;
3782 id = sna->kgem.nbatch++;
3784 /* Declare samplers */
3785 gen3_fs_dcl(FS_S0); /* Y */
3786 gen3_fs_dcl(FS_S1); /* U */
3787 gen3_fs_dcl(FS_S2); /* V */
3788 gen3_fs_dcl(FS_T0); /* normalized coords */
3790 /* Load samplers to temporaries. */
3791 gen3_fs_texld(FS_R1, FS_S0, FS_T0);
3792 gen3_fs_texld(FS_R2, FS_S1, FS_T0);
3793 gen3_fs_texld(FS_R3, FS_S2, FS_T0);
3795 /* Move the sampled YUV data in R[123] to the first
3798 gen3_fs_mov_masked(FS_R0, MASK_X,
3799 gen3_fs_operand_reg(FS_R1));
3800 gen3_fs_mov_masked(FS_R0, MASK_Y,
3801 gen3_fs_operand_reg(FS_R2));
3802 gen3_fs_mov_masked(FS_R0, MASK_Z,
3803 gen3_fs_operand_reg(FS_R3));
3805 /* Normalize the YUV data */
3806 gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
3807 gen3_fs_operand_reg(FS_C0));
3808 /* dot-product the YUV data in R0 by the vectors of
3809 * coefficients for calculating R, G, and B, storing
3810 * the results in the R, G, or B channels of the output
3811 * color. The OC results are implicitly clamped
3812 * at the end of the program.
3814 gen3_fs_dp3(FS_OC, MASK_X,
3815 gen3_fs_operand_reg(FS_R0),
3816 gen3_fs_operand_reg(FS_C1));
3817 gen3_fs_dp3(FS_OC, MASK_Y,
3818 gen3_fs_operand_reg(FS_R0),
3819 gen3_fs_operand_reg(FS_C2));
3820 gen3_fs_dp3(FS_OC, MASK_Z,
3821 gen3_fs_operand_reg(FS_R0),
3822 gen3_fs_operand_reg(FS_C3));
3823 /* Set alpha of the output to 1.0, by wiring W to 1
3824 * and not actually using the source.
3826 gen3_fs_mov_masked(FS_OC, MASK_W,
3827 gen3_fs_operand_one());
3829 if (video->brightness != 0) {
3831 gen3_fs_operand_reg(FS_OC),
3832 gen3_fs_operand(FS_C4, X, X, X, ZERO));
3835 sna->kgem.batch[id] =
3836 _3DSTATE_PIXEL_SHADER_PROGRAM |
3837 (sna->kgem.nbatch - id - 2);
3844 gen3_video_get_batch(struct sna *sna)
3846 kgem_set_mode(&sna->kgem, KGEM_RENDER);
3848 if (!kgem_check_batch(&sna->kgem, 120) ||
3849 !kgem_check_reloc(&sna->kgem, 4) ||
3850 !kgem_check_exec(&sna->kgem, 2)) {
3851 _kgem_submit(&sna->kgem);
3852 _kgem_set_mode(&sna->kgem, KGEM_RENDER);
3855 if (sna->render_state.gen3.need_invariant)
3856 gen3_emit_invariant(sna);
3860 gen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex)
3862 int size = floats_per_vertex * 3;
3863 int rem = batch_space(sna) - 1;
3865 if (size * want > rem)
3872 gen3_render_video(struct sna *sna,
3873 struct sna_video *video,
3874 struct sna_video_frame *frame,
3875 RegionPtr dstRegion,
3876 short src_w, short src_h,
3877 short drw_w, short drw_h,
3880 struct sna_pixmap *priv = sna_pixmap(pixmap);
3881 BoxPtr pbox = REGION_RECTS(dstRegion);
3882 int nbox = REGION_NUM_RECTS(dstRegion);
3883 int dxo = dstRegion->extents.x1;
3884 int dyo = dstRegion->extents.y1;
3885 int width = dstRegion->extents.x2 - dxo;
3886 int height = dstRegion->extents.y2 - dyo;
3887 float src_scale_x, src_scale_y;
3888 int pix_xoff, pix_yoff;
3889 struct kgem_bo *dst_bo;
3892 DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_w, src_h, drw_w, drw_h));
3894 dst_bo = priv->gpu_bo;
3898 if (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
3899 !gen3_check_pitch_3d(dst_bo)) {
3900 int bpp = pixmap->drawable.bitsPerPixel;
3902 dst_bo = kgem_create_2d(&sna->kgem,
3904 kgem_choose_tiling(&sna->kgem,
3906 width, height, bpp),
3915 width = pixmap->drawable.width;
3916 height = pixmap->drawable.height;
3918 /* Set up the offset for translating from the given region
3919 * (in screen coordinates) to the backing pixmap.
3922 pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
3923 pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
3930 src_scale_x = ((float)src_w / frame->width) / drw_w;
3931 src_scale_y = ((float)src_h / frame->height) / drw_h;
3933 DBG(("%s: src offset=(%d, %d), scale=(%f, %f), dst offset=(%d, %d)\n",
3935 dxo, dyo, src_scale_x, src_scale_y, pix_xoff, pix_yoff));
3937 gen3_video_get_batch(sna);
3938 gen3_emit_video_state(sna, video, frame, pixmap,
3939 dst_bo, width, height);
3941 int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
3942 if (nbox_this_time == 0) {
3943 gen3_video_get_batch(sna);
3944 gen3_emit_video_state(sna, video, frame, pixmap,
3945 dst_bo, width, height);
3946 nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
3948 nbox -= nbox_this_time;
3950 OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
3951 while (nbox_this_time--) {
3952 int box_x1 = pbox->x1;
3953 int box_y1 = pbox->y1;
3954 int box_x2 = pbox->x2;
3955 int box_y2 = pbox->y2;
3959 DBG(("%s: box (%d, %d), (%d, %d)\n",
3960 __FUNCTION__, box_x1, box_y1, box_x2, box_y2));
3963 OUT_BATCH_F(box_x2 + pix_xoff);
3964 OUT_BATCH_F(box_y2 + pix_yoff);
3965 OUT_BATCH_F((box_x2 - dxo) * src_scale_x);
3966 OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
3969 OUT_BATCH_F(box_x1 + pix_xoff);
3970 OUT_BATCH_F(box_y2 + pix_yoff);
3971 OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
3972 OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
3975 OUT_BATCH_F(box_x1 + pix_xoff);
3976 OUT_BATCH_F(box_y1 + pix_yoff);
3977 OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
3978 OUT_BATCH_F((box_y1 - dyo) * src_scale_y);
3984 pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
3985 pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
3990 sna_blt_copy_boxes(sna, GXcopy,
3992 priv->gpu_bo, pix_xoff, pix_yoff,
3993 pixmap->drawable.bitsPerPixel,
3994 REGION_RECTS(dstRegion),
3995 REGION_NUM_RECTS(dstRegion));
3997 kgem_bo_destroy(&sna->kgem, dst_bo);
4000 if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
4001 if ((pix_xoff | pix_yoff) == 0) {
4002 sna_damage_add(&priv->gpu_damage, dstRegion);
4003 sna_damage_subtract(&priv->cpu_damage, dstRegion);
4005 sna_damage_add_boxes(&priv->gpu_damage,
4006 REGION_RECTS(dstRegion),
4007 REGION_NUM_RECTS(dstRegion),
4008 pix_xoff, pix_yoff);
4009 sna_damage_subtract_boxes(&priv->cpu_damage,
4010 REGION_RECTS(dstRegion),
4011 REGION_NUM_RECTS(dstRegion),
4012 pix_xoff, pix_yoff);
4015 priv->clear = false;
4021 gen3_render_copy_setup_source(struct sna_composite_channel *channel,
4025 channel->u.gen3.type = SHADER_TEXTURE;
4026 channel->filter = gen3_filter(PictFilterNearest);
4027 channel->repeat = gen3_texture_repeat(RepeatNone);
4028 channel->width = pixmap->drawable.width;
4029 channel->height = pixmap->drawable.height;
4030 channel->scale[0] = 1.f/pixmap->drawable.width;
4031 channel->scale[1] = 1.f/pixmap->drawable.height;
4032 channel->offset[0] = 0;
4033 channel->offset[1] = 0;
4034 gen3_composite_channel_set_format(channel,
4035 sna_format_for_depth(pixmap->drawable.depth));
4037 channel->is_affine = 1;
4041 gen3_render_copy_boxes(struct sna *sna, uint8_t alu,
4042 PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
4043 PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
4044 const BoxRec *box, int n, unsigned flags)
4046 struct sna_composite_op tmp;
4049 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
4052 return sna_blt_copy_boxes(sna, alu,
4053 src_bo, src_dx, src_dy,
4054 dst_bo, dst_dx, dst_dy,
4055 dst->drawable.bitsPerPixel,
4059 DBG(("%s (%d, %d)->(%d, %d) x %d\n",
4060 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
4062 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
4063 sna_blt_copy_boxes(sna, alu,
4064 src_bo, src_dx, src_dy,
4065 dst_bo, dst_dx, dst_dy,
4066 dst->drawable.bitsPerPixel,
4070 if (!(alu == GXcopy || alu == GXclear) ||
4071 src_bo == dst_bo || /* XXX handle overlap using 3D ? */
4072 src_bo->pitch > MAX_3D_PITCH ||
4073 too_large(src->drawable.width, src->drawable.height)) {
4075 if (!kgem_bo_can_blt(&sna->kgem, src_bo) ||
4076 !kgem_bo_can_blt(&sna->kgem, dst_bo))
4079 return sna_blt_copy_boxes_fallback(sna, alu,
4080 src, src_bo, src_dx, src_dy,
4081 dst, dst_bo, dst_dx, dst_dy,
4085 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
4086 kgem_submit(&sna->kgem);
4087 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
4091 memset(&tmp, 0, sizeof(tmp));
4092 tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
4094 tmp.dst.pixmap = dst;
4095 tmp.dst.width = dst->drawable.width;
4096 tmp.dst.height = dst->drawable.height;
4097 tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
4098 tmp.dst.bo = dst_bo;
4099 tmp.dst.x = tmp.dst.y = 0;
4102 sna_render_composite_redirect_init(&tmp);
4103 if (too_large(tmp.dst.width, tmp.dst.height) ||
4104 dst_bo->pitch > MAX_3D_PITCH) {
4105 BoxRec extents = box[0];
4108 for (i = 1; i < n; i++) {
4109 if (box[i].x1 < extents.x1)
4110 extents.x1 = box[i].x1;
4111 if (box[i].y1 < extents.y1)
4112 extents.y1 = box[i].y1;
4114 if (box[i].x2 > extents.x2)
4115 extents.x2 = box[i].x2;
4116 if (box[i].y2 > extents.y2)
4117 extents.y2 = box[i].y2;
4119 if (!sna_render_composite_redirect(sna, &tmp,
4120 extents.x1 + dst_dx,
4121 extents.y1 + dst_dy,
4122 extents.x2 - extents.x1,
4123 extents.y2 - extents.y1))
4124 goto fallback_tiled;
4127 gen3_render_copy_setup_source(&tmp.src, src, src_bo);
4129 tmp.floats_per_vertex = 4;
4130 tmp.floats_per_rect = 12;
4132 tmp.mask.u.gen3.type = SHADER_NONE;
4134 dst_dx += tmp.dst.x;
4135 dst_dy += tmp.dst.y;
4136 tmp.dst.x = tmp.dst.y = 0;
4138 gen3_emit_composite_state(sna, &tmp);
4139 gen3_align_vertex(sna, &tmp);
4144 n_this_time = gen3_get_rectangles(sna, &tmp, n);
4148 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n",
4149 box->x1 + src_dx, box->y1 + src_dy,
4150 box->x1 + dst_dx, box->y1 + dst_dy,
4151 box->x2 - box->x1, box->y2 - box->y1));
4152 OUT_VERTEX(box->x2 + dst_dx);
4153 OUT_VERTEX(box->y2 + dst_dy);
4154 OUT_VERTEX((box->x2 + src_dx) * tmp.src.scale[0]);
4155 OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
4157 OUT_VERTEX(box->x1 + dst_dx);
4158 OUT_VERTEX(box->y2 + dst_dy);
4159 OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
4160 OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
4162 OUT_VERTEX(box->x1 + dst_dx);
4163 OUT_VERTEX(box->y1 + dst_dy);
4164 OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
4165 OUT_VERTEX((box->y1 + src_dy) * tmp.src.scale[1]);
4168 } while (--n_this_time);
4171 gen3_vertex_flush(sna);
4172 sna_render_composite_redirect_done(sna, &tmp);
4176 return sna_tiling_copy_boxes(sna, alu,
4177 src, src_bo, src_dx, src_dy,
4178 dst, dst_bo, dst_dx, dst_dy,
4183 gen3_render_copy_blt(struct sna *sna,
4184 const struct sna_copy_op *op,
4185 int16_t sx, int16_t sy,
4186 int16_t w, int16_t h,
4187 int16_t dx, int16_t dy)
4189 gen3_get_rectangles(sna, &op->base, 1);
4193 OUT_VERTEX((sx+w)*op->base.src.scale[0]);
4194 OUT_VERTEX((sy+h)*op->base.src.scale[1]);
4198 OUT_VERTEX(sx*op->base.src.scale[0]);
4199 OUT_VERTEX((sy+h)*op->base.src.scale[1]);
4203 OUT_VERTEX(sx*op->base.src.scale[0]);
4204 OUT_VERTEX(sy*op->base.src.scale[1]);
4208 gen3_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
4210 if (sna->render_state.gen3.vertex_offset)
4211 gen3_vertex_flush(sna);
4215 gen3_render_copy(struct sna *sna, uint8_t alu,
4216 PixmapPtr src, struct kgem_bo *src_bo,
4217 PixmapPtr dst, struct kgem_bo *dst_bo,
4218 struct sna_copy_op *tmp)
4221 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
4224 return sna_blt_copy(sna, alu,
4226 dst->drawable.bitsPerPixel,
4230 /* Prefer to use the BLT */
4231 if (sna->kgem.mode != KGEM_RENDER &&
4232 sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
4233 sna_blt_copy(sna, alu,
4235 dst->drawable.bitsPerPixel,
4239 /* Must use the BLT if we can't RENDER... */
4240 if (!(alu == GXcopy || alu == GXclear) ||
4241 too_large(src->drawable.width, src->drawable.height) ||
4242 too_large(dst->drawable.width, dst->drawable.height) ||
4243 src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch > MAX_3D_PITCH) {
4245 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
4248 return sna_blt_copy(sna, alu, src_bo, dst_bo,
4249 dst->drawable.bitsPerPixel,
4253 tmp->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
4255 tmp->base.dst.pixmap = dst;
4256 tmp->base.dst.width = dst->drawable.width;
4257 tmp->base.dst.height = dst->drawable.height;
4258 tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
4259 tmp->base.dst.bo = dst_bo;
4261 gen3_render_copy_setup_source(&tmp->base.src, src, src_bo);
4263 tmp->base.floats_per_vertex = 4;
4264 tmp->base.floats_per_rect = 12;
4265 tmp->base.mask.bo = NULL;
4266 tmp->base.mask.u.gen3.type = SHADER_NONE;
4268 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
4269 kgem_submit(&sna->kgem);
4270 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
4274 tmp->blt = gen3_render_copy_blt;
4275 tmp->done = gen3_render_copy_done;
4277 gen3_emit_composite_state(sna, &tmp->base);
4278 gen3_align_vertex(sna, &tmp->base);
4283 gen3_render_fill_boxes_try_blt(struct sna *sna,
4284 CARD8 op, PictFormat format,
4285 const xRenderColor *color,
4286 PixmapPtr dst, struct kgem_bo *dst_bo,
4287 const BoxRec *box, int n)
4292 if (dst_bo->tiling == I915_TILING_Y) {
4293 DBG(("%s: y-tiling, can't blit\n", __FUNCTION__));
4294 assert(!too_large(dst->drawable.width, dst->drawable.height));
4301 if (op == PictOpClear) {
4304 } else if (!sna_get_pixel_from_rgba(&pixel,
4314 return sna_blt_fill_boxes(sna, alu,
4315 dst_bo, dst->drawable.bitsPerPixel,
4319 static inline bool prefer_fill_blt(struct sna *sna)
4324 return sna->kgem.mode != KGEM_RENDER;
4329 gen3_render_fill_boxes(struct sna *sna,
4332 const xRenderColor *color,
4333 PixmapPtr dst, struct kgem_bo *dst_bo,
4334 const BoxRec *box, int n)
4336 struct sna_composite_op tmp;
4339 if (op >= ARRAY_SIZE(gen3_blend_op)) {
4340 DBG(("%s: fallback due to unhandled blend op: %d\n",
4346 return gen3_render_fill_boxes_try_blt(sna, op, format, color,
4351 DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n",
4352 __FUNCTION__, op, (int)format,
4353 color->red, color->green, color->blue, color->alpha));
4355 if (too_large(dst->drawable.width, dst->drawable.height) ||
4356 dst_bo->pitch > MAX_3D_PITCH ||
4357 !gen3_check_dst_format(format)) {
4358 DBG(("%s: try blt, too large or incompatible destination\n",
4360 if (gen3_render_fill_boxes_try_blt(sna, op, format, color,
4365 if (!gen3_check_dst_format(format))
4368 return sna_tiling_fill_boxes(sna, op, format, color,
4369 dst, dst_bo, box, n);
4372 if (prefer_fill_blt(sna) &&
4373 gen3_render_fill_boxes_try_blt(sna, op, format, color,
4378 if (op == PictOpClear) {
4381 if (!sna_get_pixel_from_rgba(&pixel,
4391 DBG(("%s: using shader for op=%d, format=%x, pixel=%x\n",
4392 __FUNCTION__, op, (int)format, pixel));
4395 tmp.dst.pixmap = dst;
4396 tmp.dst.width = dst->drawable.width;
4397 tmp.dst.height = dst->drawable.height;
4398 tmp.dst.format = format;
4399 tmp.dst.bo = dst_bo;
4400 tmp.floats_per_vertex = 2;
4401 tmp.floats_per_rect = 6;
4402 tmp.rb_reversed = 0;
4403 tmp.has_component_alpha = 0;
4404 tmp.need_magic_ca_pass = false;
4406 gen3_init_solid(&tmp.src, pixel);
4408 tmp.mask.u.gen3.type = SHADER_NONE;
4409 tmp.u.gen3.num_constants = 0;
4411 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
4412 kgem_submit(&sna->kgem);
4413 assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
4416 gen3_emit_composite_state(sna, &tmp);
4417 gen3_align_vertex(sna, &tmp);
4422 n_this_time = gen3_get_rectangles(sna, &tmp, n);
4426 DBG((" (%d, %d), (%d, %d): %x\n",
4427 box->x1, box->y1, box->x2, box->y2, pixel));
4428 OUT_VERTEX(box->x2);
4429 OUT_VERTEX(box->y2);
4430 OUT_VERTEX(box->x1);
4431 OUT_VERTEX(box->y2);
4432 OUT_VERTEX(box->x1);
4433 OUT_VERTEX(box->y1);
4435 } while (--n_this_time);
4438 gen3_vertex_flush(sna);
4443 gen3_render_fill_op_blt(struct sna *sna,
4444 const struct sna_fill_op *op,
4445 int16_t x, int16_t y, int16_t w, int16_t h)
4447 gen3_get_rectangles(sna, &op->base, 1);
4457 fastcall static void
4458 gen3_render_fill_op_box(struct sna *sna,
4459 const struct sna_fill_op *op,
4462 gen3_get_rectangles(sna, &op->base, 1);
4464 OUT_VERTEX(box->x2);
4465 OUT_VERTEX(box->y2);
4466 OUT_VERTEX(box->x1);
4467 OUT_VERTEX(box->y2);
4468 OUT_VERTEX(box->x1);
4469 OUT_VERTEX(box->y1);
4472 fastcall static void
4473 gen3_render_fill_op_boxes(struct sna *sna,
4474 const struct sna_fill_op *op,
4478 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
4479 box->x1, box->y1, box->x2, box->y2, nbox));
4484 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
4485 nbox -= nbox_this_time;
4488 OUT_VERTEX(box->x2);
4489 OUT_VERTEX(box->y2);
4490 OUT_VERTEX(box->x1);
4491 OUT_VERTEX(box->y2);
4492 OUT_VERTEX(box->x1);
4493 OUT_VERTEX(box->y1);
4495 } while (--nbox_this_time);
4500 gen3_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
4502 if (sna->render_state.gen3.vertex_offset)
4503 gen3_vertex_flush(sna);
4507 gen3_render_fill(struct sna *sna, uint8_t alu,
4508 PixmapPtr dst, struct kgem_bo *dst_bo,
4510 struct sna_fill_op *tmp)
4513 return sna_blt_fill(sna, alu,
4514 dst_bo, dst->drawable.bitsPerPixel,
4519 /* Prefer to use the BLT if already engaged */
4520 if (prefer_fill_blt(sna) &&
4521 sna_blt_fill(sna, alu,
4522 dst_bo, dst->drawable.bitsPerPixel,
4527 /* Must use the BLT if we can't RENDER... */
4528 if (!(alu == GXcopy || alu == GXclear) ||
4529 too_large(dst->drawable.width, dst->drawable.height) ||
4530 dst_bo->pitch > MAX_3D_PITCH)
4531 return sna_blt_fill(sna, alu,
4532 dst_bo, dst->drawable.bitsPerPixel,
4539 tmp->base.op = color == 0 ? PictOpClear : PictOpSrc;
4540 tmp->base.dst.pixmap = dst;
4541 tmp->base.dst.width = dst->drawable.width;
4542 tmp->base.dst.height = dst->drawable.height;
4543 tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
4544 tmp->base.dst.bo = dst_bo;
4545 tmp->base.floats_per_vertex = 2;
4546 tmp->base.floats_per_rect = 6;
4547 tmp->base.need_magic_ca_pass = 0;
4548 tmp->base.has_component_alpha = 0;
4549 tmp->base.rb_reversed = 0;
4551 gen3_init_solid(&tmp->base.src,
4552 sna_rgba_for_color(color, dst->drawable.depth));
4553 tmp->base.mask.bo = NULL;
4554 tmp->base.mask.u.gen3.type = SHADER_NONE;
4555 tmp->base.u.gen3.num_constants = 0;
4557 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
4558 kgem_submit(&sna->kgem);
4559 assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
4562 tmp->blt = gen3_render_fill_op_blt;
4563 tmp->box = gen3_render_fill_op_box;
4564 tmp->boxes = gen3_render_fill_op_boxes;
4565 tmp->done = gen3_render_fill_op_done;
4567 gen3_emit_composite_state(sna, &tmp->base);
4568 gen3_align_vertex(sna, &tmp->base);
4573 gen3_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
4575 int16_t x1, int16_t y1, int16_t x2, int16_t y2,
4585 return sna_blt_fill_boxes(sna, alu,
4586 bo, dst->drawable.bitsPerPixel,
4591 gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
4593 int16_t x1, int16_t y1,
4594 int16_t x2, int16_t y2,
4597 struct sna_composite_op tmp;
4600 return gen3_render_fill_one_try_blt(sna, dst, bo, color,
4601 x1, y1, x2, y2, alu);
4604 /* Prefer to use the BLT if already engaged */
4605 if (prefer_fill_blt(sna) &&
4606 gen3_render_fill_one_try_blt(sna, dst, bo, color,
4607 x1, y1, x2, y2, alu))
4610 /* Must use the BLT if we can't RENDER... */
4611 if (!(alu == GXcopy || alu == GXclear) ||
4612 too_large(dst->drawable.width, dst->drawable.height) ||
4613 bo->pitch > MAX_3D_PITCH)
4614 return gen3_render_fill_one_try_blt(sna, dst, bo, color,
4615 x1, y1, x2, y2, alu);
4620 tmp.op = color == 0 ? PictOpClear : PictOpSrc;
4621 tmp.dst.pixmap = dst;
4622 tmp.dst.width = dst->drawable.width;
4623 tmp.dst.height = dst->drawable.height;
4624 tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
4626 tmp.floats_per_vertex = 2;
4627 tmp.floats_per_rect = 6;
4628 tmp.need_magic_ca_pass = 0;
4629 tmp.has_component_alpha = 0;
4630 tmp.rb_reversed = 0;
4632 gen3_init_solid(&tmp.src,
4633 sna_rgba_for_color(color, dst->drawable.depth));
4635 tmp.mask.u.gen3.type = SHADER_NONE;
4636 tmp.u.gen3.num_constants = 0;
4638 if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
4639 kgem_submit(&sna->kgem);
4640 if (gen3_render_fill_one_try_blt(sna, dst, bo, color,
4641 x1, y1, x2, y2, alu))
4645 gen3_emit_composite_state(sna, &tmp);
4646 gen3_align_vertex(sna, &tmp);
4647 gen3_get_rectangles(sna, &tmp, 1);
4648 DBG((" (%d, %d), (%d, %d): %x\n", x1, y1, x2, y2, color));
4655 gen3_vertex_flush(sna);
4660 static void gen3_render_flush(struct sna *sna)
4662 gen3_vertex_close(sna);
4666 gen3_render_fini(struct sna *sna)
4670 bool gen3_render_init(struct sna *sna)
4672 struct sna_render *render = &sna->render;
4675 render->composite = gen3_render_composite;
4677 #if !NO_COMPOSITE_SPANS
4678 render->check_composite_spans = gen3_check_composite_spans;
4679 render->composite_spans = gen3_render_composite_spans;
4682 render->video = gen3_render_video;
4684 render->copy_boxes = gen3_render_copy_boxes;
4685 render->copy = gen3_render_copy;
4687 render->fill_boxes = gen3_render_fill_boxes;
4688 render->fill = gen3_render_fill;
4689 render->fill_one = gen3_render_fill_one;
4691 render->reset = gen3_render_reset;
4692 render->flush = gen3_render_flush;
4693 render->fini = gen3_render_fini;
4695 render->max_3d_size = MAX_3D_SIZE;
4696 render->max_3d_pitch = MAX_3D_PITCH;
4698 sna->kgem.retire = gen3_render_retire;
4699 sna->kgem.expire = gen3_render_expire;