r300: reset bos when validating buffers during blit
[profile/ivi/mesa.git] / src / mesa / drivers / dri / r300 / r300_blit.c
1 /*
2  * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27
28 #include "radeon_common.h"
29 #include "r300_context.h"
30
31 #include "r300_blit.h"
32 #include "r300_cmdbuf.h"
33 #include "r300_emit.h"
34 #include "r300_tex.h"
35 #include "compiler/radeon_compiler.h"
36 #include "compiler/radeon_opcodes.h"
37
38 static void vp_ins_outs(struct r300_vertex_program_compiler *c)
39 {
40     c->code->inputs[VERT_ATTRIB_POS] = 0;
41     c->code->inputs[VERT_ATTRIB_TEX0] = 1;
42     c->code->outputs[VERT_RESULT_HPOS] = 0;
43     c->code->outputs[VERT_RESULT_TEX0] = 1;
44 }
45
46 static void fp_allocate_hw_inputs(
47     struct r300_fragment_program_compiler * c,
48     void (*allocate)(void * data, unsigned input, unsigned hwreg),
49     void * mydata)
50 {
51     allocate(mydata, FRAG_ATTRIB_TEX0, 0);
52 }
53
54 static void create_vertex_program(struct r300_context *r300)
55 {
56     struct r300_vertex_program_compiler compiler;
57     struct rc_instruction *inst;
58
59     rc_init(&compiler.Base);
60
61     inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev);
62     inst->U.I.Opcode = RC_OPCODE_MOV;
63     inst->U.I.DstReg.File = RC_FILE_OUTPUT;
64     inst->U.I.DstReg.Index = VERT_RESULT_HPOS;
65     inst->U.I.DstReg.RelAddr = 0;
66     inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
67     inst->U.I.SrcReg[0].Abs = 0;
68     inst->U.I.SrcReg[0].File = RC_FILE_INPUT;
69     inst->U.I.SrcReg[0].Index = VERT_ATTRIB_POS;
70     inst->U.I.SrcReg[0].Negate = 0;
71     inst->U.I.SrcReg[0].RelAddr = 0;
72     inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
73
74     inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev);
75     inst->U.I.Opcode = RC_OPCODE_MOV;
76     inst->U.I.DstReg.File = RC_FILE_OUTPUT;
77     inst->U.I.DstReg.Index = VERT_RESULT_TEX0;
78     inst->U.I.DstReg.RelAddr = 0;
79     inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
80     inst->U.I.SrcReg[0].Abs = 0;
81     inst->U.I.SrcReg[0].File = RC_FILE_INPUT;
82     inst->U.I.SrcReg[0].Index = VERT_ATTRIB_TEX0;
83     inst->U.I.SrcReg[0].Negate = 0;
84     inst->U.I.SrcReg[0].RelAddr = 0;
85     inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
86
87     compiler.Base.Program.InputsRead = (1 << VERT_ATTRIB_POS) | (1 << VERT_ATTRIB_TEX0);
88     compiler.RequiredOutputs = compiler.Base.Program.OutputsWritten = (1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_TEX0);
89     compiler.SetHwInputOutput = vp_ins_outs;
90     compiler.code = &r300->blit.vp_code;
91
92     r3xx_compile_vertex_program(&compiler);
93 }
94
95 static void create_fragment_program(struct r300_context *r300)
96 {
97     struct r300_fragment_program_compiler compiler;
98     struct rc_instruction *inst;
99
100     memset(&compiler, 0, sizeof(struct r300_fragment_program_compiler));
101     rc_init(&compiler.Base);
102
103     inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev);
104     inst->U.I.Opcode = RC_OPCODE_TEX;
105     inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
106     inst->U.I.TexSrcUnit = 0;
107     inst->U.I.DstReg.File = RC_FILE_OUTPUT;
108     inst->U.I.DstReg.Index = FRAG_RESULT_COLOR;
109     inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
110     inst->U.I.SrcReg[0].Abs = 0;
111     inst->U.I.SrcReg[0].File = RC_FILE_INPUT;
112     inst->U.I.SrcReg[0].Index = FRAG_ATTRIB_TEX0;
113     inst->U.I.SrcReg[0].Negate = 0;
114     inst->U.I.SrcReg[0].RelAddr = 0;
115     inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
116
117     compiler.Base.Program.InputsRead = (1 << FRAG_ATTRIB_TEX0);
118     compiler.OutputColor[0] = FRAG_RESULT_COLOR;
119     compiler.OutputDepth = FRAG_RESULT_DEPTH;
120     compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515);
121     compiler.code = &r300->blit.fp_code;
122     compiler.AllocateHwInputs = fp_allocate_hw_inputs;
123
124     r3xx_compile_fragment_program(&compiler);
125 }
126
127 void r300_blit_init(struct r300_context *r300)
128 {
129     if (r300->options.hw_tcl_enabled)
130         create_vertex_program(r300);
131     create_fragment_program(r300);
132 }
133
134 static void r300_emit_tx_setup(struct r300_context *r300,
135                                gl_format mesa_format,
136                                struct radeon_bo *bo,
137                                intptr_t offset,
138                                unsigned width,
139                                unsigned height,
140                                unsigned pitch)
141 {
142     BATCH_LOCALS(&r300->radeon);
143
144     assert(width <= 2048);
145     assert(height <= 2048);
146     assert(r300TranslateTexFormat(mesa_format) >= 0);
147     assert(offset % 32 == 0);
148
149     BEGIN_BATCH(17);
150     OUT_BATCH_REGVAL(R300_TX_FILTER0_0,
151                      (R300_TX_CLAMP_TO_EDGE  << R300_TX_WRAP_S_SHIFT) |
152                      (R300_TX_CLAMP_TO_EDGE  << R300_TX_WRAP_T_SHIFT) |
153                      (R300_TX_CLAMP_TO_EDGE  << R300_TX_WRAP_R_SHIFT) |
154                      R300_TX_MIN_FILTER_MIP_NONE |
155                      R300_TX_MIN_FILTER_NEAREST |
156                      R300_TX_MAG_FILTER_NEAREST |
157                      (0 << 28));
158     OUT_BATCH_REGVAL(R300_TX_FILTER1_0, 0);
159     OUT_BATCH_REGVAL(R300_TX_SIZE_0,
160                      ((width-1) << R300_TX_WIDTHMASK_SHIFT) |
161                      ((height-1) << R300_TX_HEIGHTMASK_SHIFT) |
162                      (0 << R300_TX_DEPTHMASK_SHIFT) |
163                      (0 << R300_TX_MAX_MIP_LEVEL_SHIFT) |
164                      R300_TX_SIZE_TXPITCH_EN);
165
166     OUT_BATCH_REGVAL(R300_TX_FORMAT_0, r300TranslateTexFormat(mesa_format));
167     OUT_BATCH_REGVAL(R300_TX_FORMAT2_0, pitch - 1);
168     OUT_BATCH_REGSEQ(R300_TX_OFFSET_0, 1);
169     OUT_BATCH_RELOC(0, bo, offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
170
171     OUT_BATCH_REGSEQ(R300_TX_INVALTAGS, 2);
172     OUT_BATCH(0);
173     OUT_BATCH(1);
174
175     END_BATCH();
176 }
177
178 #define EASY_US_FORMAT(FMT, C0, C1, C2, C3, SIGN) \
179     (FMT  | R500_C0_SEL_##C0 | R500_C1_SEL_##C1 | \
180     R500_C2_SEL_##C2 | R500_C3_SEL_##C3 | R500_OUT_SIGN(SIGN))
181
182 static uint32_t mesa_format_to_us_format(gl_format mesa_format)
183 {
184     switch(mesa_format)
185     {
186         case MESA_FORMAT_RGBA8888: // x
187             return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, B, G, R, 0);
188         case MESA_FORMAT_RGB565: // x
189         case MESA_FORMAT_ARGB1555: // x
190         case MESA_FORMAT_RGBA8888_REV: // x
191             return EASY_US_FORMAT(R500_OUT_FMT_C4_8, R, G, B, A, 0);
192         case MESA_FORMAT_ARGB8888: // x
193             return EASY_US_FORMAT(R500_OUT_FMT_C4_8, B, G, R, A, 0);
194         case MESA_FORMAT_ARGB8888_REV:
195             return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, R, G, B, 0);
196         case MESA_FORMAT_XRGB8888:
197             return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, R, G, B, 0);
198
199         case MESA_FORMAT_RGB332:
200             return EASY_US_FORMAT(R500_OUT_FMT_C_3_3_2, A, R, G, B, 0);
201
202         case MESA_FORMAT_RGBA_FLOAT32:
203             return EASY_US_FORMAT(R500_OUT_FMT_C4_32_FP, R, G, B, A, 0);
204         case MESA_FORMAT_RGBA_FLOAT16:
205             return EASY_US_FORMAT(R500_OUT_FMT_C4_16_FP, R, G, B, A, 0);
206         case MESA_FORMAT_ALPHA_FLOAT32:
207             return EASY_US_FORMAT(R500_OUT_FMT_C_32_FP, A, A, A, A, 0);
208         case MESA_FORMAT_ALPHA_FLOAT16:
209             return EASY_US_FORMAT(R500_OUT_FMT_C_16_FP, A, A, A, A, 0);
210
211         case MESA_FORMAT_SIGNED_RGBA8888:
212             return EASY_US_FORMAT(R500_OUT_FMT_C4_8, R, G, B, A, 0xf);
213         case MESA_FORMAT_SIGNED_RGBA8888_REV:
214             return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, B, G, R, 0xf);
215         case MESA_FORMAT_SIGNED_RGBA_16:
216             return EASY_US_FORMAT(R500_OUT_FMT_C4_16, R, G, B, A, 0xf);
217
218         default:
219             fprintf(stderr, "Unsupported format %s for US output\n", _mesa_get_format_name(mesa_format));
220             assert(0);
221             return 0;
222     }
223 }
224 #undef EASY_US_FORMAT
225
226 static void r500_emit_fp_setup(struct r300_context *r300,
227                                struct r500_fragment_program_code *fp,
228                                gl_format dst_format)
229 {
230     r500_emit_fp(r300, (uint32_t *)fp->inst, (fp->inst_end + 1) * 6, 0, 0, 0);
231     BATCH_LOCALS(&r300->radeon);
232
233     BEGIN_BATCH(10);
234     OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3);
235     OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(fp->inst_end));
236     OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(fp->inst_end));
237     OUT_BATCH(0);
238     OUT_BATCH_REGVAL(R500_US_CONFIG, 0);
239     OUT_BATCH_REGVAL(R500_US_OUT_FMT_0, mesa_format_to_us_format(dst_format));
240     OUT_BATCH_REGVAL(R500_US_PIXSIZE, fp->max_temp_idx);
241     END_BATCH();
242 }
243
244 static void r500_emit_rs_setup(struct r300_context *r300)
245 {
246     BATCH_LOCALS(&r300->radeon);
247
248     BEGIN_BATCH(7);
249     OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
250     OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN);
251     OUT_BATCH(0);
252     OUT_BATCH_REGVAL(R500_RS_INST_0,
253                      (0 << R500_RS_INST_TEX_ID_SHIFT) |
254                      (0 << R500_RS_INST_TEX_ADDR_SHIFT) |
255                      R500_RS_INST_TEX_CN_WRITE |
256                      R500_RS_INST_COL_CN_NO_WRITE);
257     OUT_BATCH_REGVAL(R500_RS_IP_0,
258                      (0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
259                      (1 << R500_RS_IP_TEX_PTR_T_SHIFT) |
260                      (2 << R500_RS_IP_TEX_PTR_R_SHIFT) |
261                      (3 << R500_RS_IP_TEX_PTR_Q_SHIFT));
262     END_BATCH();
263 }
264
265 static void r300_emit_fp_setup(struct r300_context *r300,
266                                struct r300_fragment_program_code *code,
267                                gl_format dst_format)
268 {
269     unsigned i;
270     BATCH_LOCALS(&r300->radeon);
271
272     BEGIN_BATCH((code->alu.length + 1) * 4 + code->tex.length + 1 + 11);
273
274     OUT_BATCH_REGSEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
275     for (i = 0; i < code->alu.length; i++) {
276         OUT_BATCH(code->alu.inst[i].rgb_inst);
277     }
278     OUT_BATCH_REGSEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
279     for (i = 0; i < code->alu.length; i++) {
280         OUT_BATCH(code->alu.inst[i].rgb_addr);
281     }
282     OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
283     for (i = 0; i < code->alu.length; i++) {
284         OUT_BATCH(code->alu.inst[i].alpha_inst);
285     }
286     OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
287     for (i = 0; i < code->alu.length; i++) {
288         OUT_BATCH(code->alu.inst[i].alpha_addr);
289     }
290
291     OUT_BATCH_REGSEQ(R300_US_TEX_INST_0, code->tex.length);
292     OUT_BATCH_TABLE(code->tex.inst, code->tex.length);
293
294     OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
295     OUT_BATCH(R300_PFS_CNTL_FIRST_NODE_HAS_TEX);
296     OUT_BATCH(code->pixsize);
297     OUT_BATCH(code->code_offset);
298     OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
299     OUT_BATCH_TABLE(code->code_addr, 4);
300     OUT_BATCH_REGVAL(R500_US_OUT_FMT_0, mesa_format_to_us_format(dst_format));
301     END_BATCH();
302 }
303
304 static void r300_emit_rs_setup(struct r300_context *r300)
305 {
306     BATCH_LOCALS(&r300->radeon);
307
308     BEGIN_BATCH(7);
309     OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
310     OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN);
311     OUT_BATCH(0);
312     OUT_BATCH_REGVAL(R300_RS_INST_0,
313                      R300_RS_INST_TEX_ID(0) |
314                      R300_RS_INST_TEX_ADDR(0) |
315                      R300_RS_INST_TEX_CN_WRITE);
316     OUT_BATCH_REGVAL(R300_RS_IP_0,
317                      R300_RS_TEX_PTR(0) |
318                      R300_RS_SEL_S(R300_RS_SEL_C0) |
319                      R300_RS_SEL_T(R300_RS_SEL_C1) |
320                      R300_RS_SEL_R(R300_RS_SEL_K0) |
321                      R300_RS_SEL_Q(R300_RS_SEL_K1));
322     END_BATCH();
323 }
324
325 static void emit_pvs_setup(struct r300_context *r300,
326                            uint32_t *vp_code,
327                            unsigned vp_len)
328 {
329     BATCH_LOCALS(&r300->radeon);
330
331     r300_emit_vpu(r300, vp_code, vp_len * 4, R300_PVS_CODE_START);
332
333     BEGIN_BATCH(4);
334     OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
335     OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) |
336               ((vp_len - 1)  << R300_PVS_XYZW_VALID_INST_SHIFT) |
337               ((vp_len - 1)<< R300_PVS_LAST_INST_SHIFT));
338     OUT_BATCH(0);
339     OUT_BATCH((vp_len - 1) << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
340     END_BATCH();
341 }
342
343 static void emit_vap_setup(struct r300_context *r300)
344 {
345     int tex_offset;
346     BATCH_LOCALS(&r300->radeon);
347
348     if (r300->options.hw_tcl_enabled)
349         tex_offset = 1;
350     else
351         tex_offset = 6;
352
353     BEGIN_BATCH(12);
354     OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2);
355     OUT_BATCH(R300_VTX_XY_FMT | R300_VTX_Z_FMT);
356     OUT_BATCH(4);
357
358     OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
359     OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_0,
360                      ((R300_DATA_TYPE_FLOAT_2 | (0 << R300_DST_VEC_LOC_SHIFT)) << 0) |
361                      (((tex_offset << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_2 | R300_LAST_VEC) << 16));
362     OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0,
363                     ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
364                        (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
365                        (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) |
366                        (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | 
367                        (0xf << R300_WRITE_ENA_SHIFT) ) << 0) |
368                      (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
369                        (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
370                        (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) |
371                        (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) |
372                        (0xf << R300_WRITE_ENA_SHIFT) ) << 16) ) );
373     OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
374     OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT);
375     OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS);
376     END_BATCH();
377 }
378
379 static GLboolean validate_buffers(struct r300_context *r300,
380                                   struct radeon_bo *src_bo,
381                                   struct radeon_bo *dst_bo)
382 {
383     int ret;
384
385     radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs);
386
387     ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs,
388                                         src_bo, RADEON_GEM_DOMAIN_VRAM, 0);
389     if (ret)
390         return GL_FALSE;
391
392     ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs,
393                                         dst_bo, 0, RADEON_GEM_DOMAIN_VRAM);
394     if (ret)
395         return GL_FALSE;
396
397     ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs,
398                                         first_elem(&r300->radeon.dma.reserved)->bo,
399                                         RADEON_GEM_DOMAIN_GTT, 0);
400     if (ret)
401         return GL_FALSE;
402
403     return GL_TRUE;
404 }
405
406 /**
407  * Calculate texcoords for given image region.
408  * Output values are [minx, maxx, miny, maxy]
409  */
410 static void calc_tex_coords(float img_width, float img_height,
411                             float x, float y,
412                             float reg_width, float reg_height,
413                             unsigned flip_y, float *buf)
414 {
415     buf[0] = x / img_width;
416     buf[1] = buf[0] + reg_width / img_width;
417     buf[2] = y / img_height;
418     buf[3] = buf[2] + reg_height / img_height;
419     if (flip_y)
420     {
421         buf[2] = 1.0 - buf[2];
422         buf[3] = 1.0 - buf[3];
423     }
424 }
425
426 static void emit_draw_packet(struct r300_context *r300,
427                              unsigned src_width, unsigned src_height,
428                              unsigned src_x_offset, unsigned src_y_offset,
429                              unsigned dst_x_offset, unsigned dst_y_offset,
430                              unsigned reg_width, unsigned reg_height,
431                              unsigned flip_y)
432 {
433     float texcoords[4];
434
435     calc_tex_coords(src_width, src_height,
436                     src_x_offset, src_y_offset,
437                     reg_width, reg_height,
438                     flip_y, texcoords);
439
440     float verts[] = { dst_x_offset, dst_y_offset,
441                       texcoords[0], texcoords[2],
442                       dst_x_offset, dst_y_offset + reg_height,
443                       texcoords[0], texcoords[3],
444                       dst_x_offset + reg_width, dst_y_offset + reg_height,
445                       texcoords[1], texcoords[3],
446                       dst_x_offset + reg_width, dst_y_offset,
447                       texcoords[1], texcoords[2] };
448
449     BATCH_LOCALS(&r300->radeon);
450
451     BEGIN_BATCH(19);
452     OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_IMMD_2, 16);
453     OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED |
454               (4 << 16) | R300_VAP_VF_CNTL__PRIM_QUADS);
455     OUT_BATCH_TABLE(verts, 16);
456     END_BATCH();
457 }
458
459 static void other_stuff(struct r300_context *r300)
460 {
461     BATCH_LOCALS(&r300->radeon);
462
463     BEGIN_BATCH(13);
464     OUT_BATCH_REGVAL(R300_GA_POLY_MODE,
465                      R300_GA_POLY_MODE_FRONT_PTYPE_TRI | R300_GA_POLY_MODE_BACK_PTYPE_TRI);
466     OUT_BATCH_REGVAL(R300_SU_CULL_MODE, R300_FRONT_FACE_CCW);
467     OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0);
468     OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0);
469     OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2);
470     OUT_BATCH(0x0);
471     OUT_BATCH(0x0);
472     OUT_BATCH_REGVAL(R300_ZB_CNTL, 0);
473     END_BATCH();
474     if (r300->options.hw_tcl_enabled) {
475         BEGIN_BATCH(2);
476         OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
477         END_BATCH();
478     }
479 }
480
481 static void emit_cb_setup(struct r300_context *r300,
482                           struct radeon_bo *bo,
483                           intptr_t offset,
484                           gl_format mesa_format,
485                           unsigned pitch,
486                           unsigned width,
487                           unsigned height)
488 {
489     BATCH_LOCALS(&r300->radeon);
490
491     unsigned x1, y1, x2, y2;
492     x1 = 0;
493     y1 = 0;
494     x2 = width - 1;
495     y2 = height - 1;
496
497     if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
498         x1 += R300_SCISSORS_OFFSET;
499         y1 += R300_SCISSORS_OFFSET;
500         x2 += R300_SCISSORS_OFFSET;
501         y2 += R300_SCISSORS_OFFSET;
502     }
503
504     r300_emit_cb_setup(r300, bo, offset, mesa_format,
505                        _mesa_get_format_bytes(mesa_format),
506                        _mesa_format_row_stride(mesa_format, pitch));
507
508     BEGIN_BATCH_NO_AUTOSTATE(5);
509     OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
510     OUT_BATCH((x1 << R300_SCISSORS_X_SHIFT)|(y1 << R300_SCISSORS_Y_SHIFT));
511     OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT));
512     OUT_BATCH_REGVAL(R300_RB3D_CCTL, 0);
513     END_BATCH();
514 }
515
516 unsigned r300_check_blit(gl_format dst_format)
517 {
518     switch (dst_format) {
519         case MESA_FORMAT_RGB565:
520         case MESA_FORMAT_ARGB1555:
521         case MESA_FORMAT_RGBA8888:
522         case MESA_FORMAT_RGBA8888_REV:
523         case MESA_FORMAT_ARGB8888:
524         case MESA_FORMAT_ARGB8888_REV:
525         case MESA_FORMAT_XRGB8888:
526             break;
527         default:
528             return 0;
529     }
530
531     if (_mesa_get_format_bits(dst_format, GL_DEPTH_BITS) > 0)
532         return 0;
533
534     return 1;
535 }
536
537 /**
538  * Copy a region of [@a width x @a height] pixels from source buffer
539  * to destination buffer.
540  * @param[in] r300 r300 context
541  * @param[in] src_bo source radeon buffer object
542  * @param[in] src_offset offset of the source image in the @a src_bo
543  * @param[in] src_mesaformat source image format
544  * @param[in] src_pitch aligned source image width
545  * @param[in] src_width source image width
546  * @param[in] src_height source image height
547  * @param[in] src_x_offset x offset in the source image
548  * @param[in] src_y_offset y offset in the source image
549  * @param[in] dst_bo destination radeon buffer object
550  * @param[in] dst_offset offset of the destination image in the @a dst_bo
551  * @param[in] dst_mesaformat destination image format
552  * @param[in] dst_pitch aligned destination image width
553  * @param[in] dst_width destination image width
554  * @param[in] dst_height destination image height
555  * @param[in] dst_x_offset x offset in the destination image
556  * @param[in] dst_y_offset y offset in the destination image
557  * @param[in] width region width
558  * @param[in] height region height
559  * @param[in] flip_y set if y coords of the source image need to be flipped
560  */
561 unsigned r300_blit(GLcontext *ctx,
562                    struct radeon_bo *src_bo,
563                    intptr_t src_offset,
564                    gl_format src_mesaformat,
565                    unsigned src_pitch,
566                    unsigned src_width,
567                    unsigned src_height,
568                    unsigned src_x_offset,
569                    unsigned src_y_offset,
570                    struct radeon_bo *dst_bo,
571                    intptr_t dst_offset,
572                    gl_format dst_mesaformat,
573                    unsigned dst_pitch,
574                    unsigned dst_width,
575                    unsigned dst_height,
576                    unsigned dst_x_offset,
577                    unsigned dst_y_offset,
578                    unsigned reg_width,
579                    unsigned reg_height,
580                    unsigned flip_y)
581 {
582     r300ContextPtr r300 = R300_CONTEXT(ctx);
583
584     if (!r300_check_blit(dst_mesaformat))
585         return 0;
586
587     /* Make sure that colorbuffer has even width - hw limitation */
588     if (dst_pitch % 2 > 0)
589         ++dst_pitch;
590
591     /* Rendering to small buffer doesn't work.
592      * Looks like a hw limitation.
593      */
594     if (dst_pitch < 32)
595         return 0;
596
597     /* Need to clamp the region size to make sure
598      * we don't read outside of the source buffer
599      * or write outside of the destination buffer.
600      */
601     if (reg_width + src_x_offset > src_width)
602         reg_width = src_width - src_x_offset;
603     if (reg_height + src_y_offset > src_height)
604         reg_height = src_height - src_y_offset;
605     if (reg_width + dst_x_offset > dst_width)
606         reg_width = dst_width - dst_x_offset;
607     if (reg_height + dst_y_offset > dst_height)
608         reg_height = dst_height - dst_y_offset;
609
610     if (src_bo == dst_bo) {
611         return 0;
612     }
613
614     if (src_offset % 32 || dst_offset % 32) {
615         return GL_FALSE;
616     }
617
618     if (0) {
619         fprintf(stderr, "src: size [%d x %d], pitch %d, "
620                 "offset [%d x %d], format %s, bo %p\n",
621                 src_width, src_height, src_pitch,
622                 src_x_offset, src_y_offset,
623                 _mesa_get_format_name(src_mesaformat),
624                 src_bo);
625         fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n",
626                 dst_pitch, dst_x_offset, dst_y_offset,
627                 _mesa_get_format_name(dst_mesaformat), dst_bo);
628         fprintf(stderr, "region: %d x %d\n", reg_width, reg_height);
629     }
630
631     /* Flush is needed to make sure that source buffer has correct data */
632     radeonFlush(r300->radeon.glCtx);
633
634     if (!validate_buffers(r300, src_bo, dst_bo))
635         return 0;
636
637     rcommonEnsureCmdBufSpace(&r300->radeon, 200, __FUNCTION__);
638
639     other_stuff(r300);
640
641     r300_emit_tx_setup(r300, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch);
642
643     if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
644         r500_emit_fp_setup(r300, &r300->blit.fp_code.code.r500, dst_mesaformat);
645         r500_emit_rs_setup(r300);
646     } else {
647         r300_emit_fp_setup(r300, &r300->blit.fp_code.code.r300, dst_mesaformat);
648         r300_emit_rs_setup(r300);
649     }
650
651     if (r300->options.hw_tcl_enabled)
652         emit_pvs_setup(r300, r300->blit.vp_code.body.d, 2);
653
654     emit_vap_setup(r300);
655
656     emit_cb_setup(r300, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
657
658     emit_draw_packet(r300, src_width, src_height,
659                      src_x_offset, src_y_offset,
660                      dst_x_offset, dst_y_offset,
661                      reg_width, reg_height,
662                      flip_y);
663
664     r300EmitCacheFlush(r300);
665
666     radeonFlush(r300->radeon.glCtx);
667
668     return 1;
669 }