2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * Emit the r300_fragment_program_code that can be understood by the hardware.
32 * Input is a pre-transformed radeon_program.
34 * \author Ben Skeggs <darktama@iinet.net.au>
36 * \author Jerome Glisse <j.glisse@gmail.com>
41 #include "r300_fragprog.h"
43 #include "../r300_reg.h"
45 #include "radeon_program_pair.h"
46 #include "r300_fragprog_swizzle.h"
49 struct r300_emit_state {
50 struct r300_fragment_program_compiler * compiler;
52 unsigned current_node : 2;
53 unsigned node_first_tex : 8;
54 unsigned node_first_alu : 8;
59 struct r300_emit_state * emit = (struct r300_emit_state*)data; \
60 struct r300_fragment_program_compiler *c = emit->compiler; \
61 struct r300_fragment_program_code *code = &c->code->code.r300
63 #define error(fmt, args...) do { \
64 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
65 __FILE__, __FUNCTION__, ##args); \
70 * Mark a temporary register as used.
72 static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
74 if (index > code->pixsize)
75 code->pixsize = index;
79 static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
82 case OPCODE_CMP: return R300_ALU_OUTC_CMP;
83 case OPCODE_DP3: return R300_ALU_OUTC_DP3;
84 case OPCODE_DP4: return R300_ALU_OUTC_DP4;
85 case OPCODE_FRC: return R300_ALU_OUTC_FRC;
87 error("translate_rgb_opcode(%i): Unknown opcode", opcode);
91 case OPCODE_MAD: return R300_ALU_OUTC_MAD;
92 case OPCODE_MAX: return R300_ALU_OUTC_MAX;
93 case OPCODE_MIN: return R300_ALU_OUTC_MIN;
94 case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
98 static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
101 case OPCODE_CMP: return R300_ALU_OUTA_CMP;
102 case OPCODE_DP3: return R300_ALU_OUTA_DP4;
103 case OPCODE_DP4: return R300_ALU_OUTA_DP4;
104 case OPCODE_EX2: return R300_ALU_OUTA_EX2;
105 case OPCODE_FRC: return R300_ALU_OUTA_FRC;
106 case OPCODE_LG2: return R300_ALU_OUTA_LG2;
108 error("translate_rgb_opcode(%i): Unknown opcode", opcode);
112 case OPCODE_MAD: return R300_ALU_OUTA_MAD;
113 case OPCODE_MAX: return R300_ALU_OUTA_MAX;
114 case OPCODE_MIN: return R300_ALU_OUTA_MIN;
115 case OPCODE_RCP: return R300_ALU_OUTA_RCP;
116 case OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
121 * Emit one paired ALU instruction.
123 static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
127 if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
128 error("Too many ALU instructions");
132 int ip = code->alu.length++;
135 code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
136 code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
138 for(j = 0; j < 3; ++j) {
139 GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
140 if (!inst->RGB.Src[j].Constant)
141 use_temporary(code, inst->RGB.Src[j].Index);
142 code->alu.inst[ip].rgb_addr |= src << (6*j);
144 src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5);
145 if (!inst->Alpha.Src[j].Constant)
146 use_temporary(code, inst->Alpha.Src[j].Index);
147 code->alu.inst[ip].alpha_addr |= src << (6*j);
149 GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
150 arg |= inst->RGB.Arg[j].Abs << 6;
151 arg |= inst->RGB.Arg[j].Negate << 5;
152 code->alu.inst[ip].rgb_inst |= arg << (7*j);
154 arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
155 arg |= inst->Alpha.Arg[j].Abs << 6;
156 arg |= inst->Alpha.Arg[j].Negate << 5;
157 code->alu.inst[ip].alpha_inst |= arg << (7*j);
160 if (inst->RGB.Saturate)
161 code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
162 if (inst->Alpha.Saturate)
163 code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
165 if (inst->RGB.WriteMask) {
166 use_temporary(code, inst->RGB.DestIndex);
167 code->alu.inst[ip].rgb_addr |=
168 (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
169 (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
171 if (inst->RGB.OutputWriteMask) {
172 code->alu.inst[ip].rgb_addr |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
173 emit->node_flags |= R300_RGBA_OUT;
176 if (inst->Alpha.WriteMask) {
177 use_temporary(code, inst->Alpha.DestIndex);
178 code->alu.inst[ip].alpha_addr |=
179 (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
182 if (inst->Alpha.OutputWriteMask) {
183 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT;
184 emit->node_flags |= R300_RGBA_OUT;
186 if (inst->Alpha.DepthWriteMask) {
187 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
188 emit->node_flags |= R300_W_OUT;
189 c->code->writes_depth = GL_TRUE;
197 * Finish the current node without advancing to the next one.
199 static GLboolean finish_node(struct r300_emit_state * emit)
201 struct r300_fragment_program_compiler * c = emit->compiler;
202 struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
204 if (code->alu.length == emit->node_first_alu) {
205 /* Generate a single NOP for this node */
206 struct radeon_pair_instruction inst;
207 _mesa_bzero(&inst, sizeof(inst));
208 if (!emit_alu(emit, &inst))
212 unsigned alu_offset = emit->node_first_alu;
213 unsigned alu_end = code->alu.length - alu_offset - 1;
214 unsigned tex_offset = emit->node_first_tex;
215 unsigned tex_end = code->tex.length - tex_offset - 1;
217 if (code->tex.length == emit->node_first_tex) {
218 if (emit->current_node > 0) {
219 error("Node %i has no TEX instructions", emit->current_node);
225 if (emit->current_node == 0)
226 code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
229 /* Write the config register.
230 * Note: The order in which the words for each node are written
231 * is not correct here and needs to be fixed up once we're entirely
234 * Also note that the register specification from AMD is slightly
235 * incorrect in its description of this register. */
236 code->code_addr[emit->current_node] =
237 (alu_offset << R300_ALU_START_SHIFT) |
238 (alu_end << R300_ALU_SIZE_SHIFT) |
239 (tex_offset << R300_TEX_START_SHIFT) |
240 (tex_end << R300_TEX_SIZE_SHIFT) |
248 * Begin a block of texture instructions.
249 * Create the necessary indirection.
251 static GLboolean begin_tex(void* data)
255 if (code->alu.length == emit->node_first_alu &&
256 code->tex.length == emit->node_first_tex) {
260 if (emit->current_node == 3) {
261 error("Too many texture indirections");
265 if (!finish_node(emit))
268 emit->current_node++;
269 emit->node_first_tex = code->tex.length;
270 emit->node_first_alu = code->alu.length;
271 emit->node_flags = 0;
276 static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst)
280 if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
281 error("Too many TEX instructions");
285 GLuint unit = inst->TexSrcUnit;
286 GLuint dest = inst->DestIndex;
289 switch(inst->Opcode) {
290 case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
291 case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
292 case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
293 case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
295 error("Unknown texture opcode %i", inst->Opcode);
299 if (inst->Opcode == RADEON_OPCODE_KIL) {
303 use_temporary(code, dest);
306 use_temporary(code, inst->SrcIndex);
308 code->tex.inst[code->tex.length++] =
309 (inst->SrcIndex << R300_SRC_ADDR_SHIFT) |
310 (dest << R300_DST_ADDR_SHIFT) |
311 (unit << R300_TEX_ID_SHIFT) |
312 (opcode << R300_TEX_INST_SHIFT);
317 static const struct radeon_pair_handler pair_handler = {
318 .EmitPaired = &emit_alu,
319 .EmitTex = &emit_tex,
320 .BeginTexBlock = &begin_tex,
321 .MaxHwTemps = R300_PFS_NUM_TEMP_REGS
325 * Final compilation step: Turn the intermediate radeon_program into
326 * machine-readable instructions.
328 void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
330 struct r300_emit_state emit;
331 struct r300_fragment_program_code *code = &compiler->code->code.r300;
333 memset(&emit, 0, sizeof(emit));
334 emit.compiler = compiler;
336 _mesa_bzero(code, sizeof(struct r300_fragment_program_code));
338 radeonPairProgram(compiler, &pair_handler, &emit);
339 if (compiler->Base.Error)
342 /* Finish the program */
345 code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
347 (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
348 ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
349 (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
350 ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
352 if (emit.current_node < 3) {
353 int shift = 3 - emit.current_node;
355 for(i = 0; i <= emit.current_node; ++i)
356 code->code_addr[shift + i] = code->code_addr[i];
357 for(i = 0; i < shift; ++i)
358 code->code_addr[i] = 0;