From: Marek Olšák Date: Fri, 28 Oct 2011 19:27:22 +0000 (+0200) Subject: r300c/compiler: remove the compiler too X-Git-Tag: 062012170305~3590 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e79aaf000b0cf0a2f6f22695dc8e0acf3d2ce182;p=profile%2Fivi%2Fmesa.git r300c/compiler: remove the compiler too Gallium has a fork of this. --- diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile deleted file mode 100644 index 5aa1332..0000000 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ /dev/null @@ -1,99 +0,0 @@ -# src/mesa/drivers/dri/r300/compiler/Makefile - -TOP = ../../../../../.. -include $(TOP)/configs/current - -LIBNAME = r300compiler - -C_SOURCES = \ - radeon_code.c \ - radeon_compiler.c \ - radeon_compiler_util.c \ - radeon_emulate_branches.c \ - radeon_emulate_loops.c \ - radeon_program.c \ - radeon_program_print.c \ - radeon_opcodes.c \ - radeon_program_alu.c \ - radeon_program_pair.c \ - radeon_program_tex.c \ - radeon_pair_translate.c \ - radeon_pair_schedule.c \ - radeon_pair_regalloc.c \ - radeon_pair_dead_sources.c \ - radeon_dataflow.c \ - radeon_dataflow_deadcode.c \ - radeon_dataflow_swizzles.c \ - radeon_list.c \ - radeon_optimize.c \ - radeon_remove_constants.c \ - radeon_rename_regs.c \ - radeon_variable.c \ - r3xx_fragprog.c \ - r300_fragprog.c \ - r300_fragprog_swizzle.c \ - r300_fragprog_emit.c \ - r500_fragprog.c \ - r500_fragprog_emit.c \ - r3xx_vertprog.c \ - r3xx_vertprog_dump.c \ - \ - memory_pool.c \ - $(TOP)/src/glsl/ralloc.c \ - $(TOP)/src/mesa/program/register_allocate.c - - -### Basic defines ### - -OBJECTS = $(C_SOURCES:.c=.o) \ - $(CPP_SOURCES:.cpp=.o) \ - $(ASM_SOURCES:.S=.o) - -INCLUDES = \ - -I. \ - -I$(TOP)/include \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/glsl \ - -I$(TOP)/src/mapi - - -##### TARGETS ##### - -default: depend lib$(LIBNAME).a - -lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/configs/current - $(MKLIB) -o $(LIBNAME) -static $(OBJECTS) - -depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null - -# Emacs tags -tags: - etags `find . -name \*.[ch]` `find ../include` - -# Remove .o and backup files -clean: - rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak - -test: default - @$(MAKE) -s -C tests/ - -# Dummy target -install: - @echo -n "" - -##### RULES ##### - -.c.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ - -.cpp.o: - $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ - -.S.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ - - -sinclude depend diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript deleted file mode 100755 index 2c748b6..0000000 --- a/src/mesa/drivers/dri/r300/compiler/SConscript +++ /dev/null @@ -1,51 +0,0 @@ -Import('*') - -env = env.Clone() -env.Append(CPPPATH = '#/include') -env.Append(CPPPATH = '#/src/mesa') -env.Append(CPPPATH = '#/src/glsl') -env.Append(CPPPATH = '#/src/mapi') - -# temporary fix -env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '') - -r300compiler = env.ConvenienceLibrary( - target = 'r300compiler', - source = [ - 'radeon_code.c', - 'radeon_compiler.c', - 'radeon_compiler_util.c', - 'radeon_program.c', - 'radeon_program_print.c', - 'radeon_opcodes.c', - 'radeon_program_alu.c', - 'radeon_program_pair.c', - 'radeon_program_tex.c', - 'radeon_pair_translate.c', - 'radeon_pair_schedule.c', - 'radeon_pair_regalloc.c', - 'radeon_pair_dead_sources.c', - 'radeon_optimize.c', - 'radeon_remove_constants.c', - 'radeon_rename_regs.c', - 'radeon_emulate_branches.c', - 'radeon_emulate_loops.c', - 'radeon_dataflow.c', - 'radeon_dataflow_deadcode.c', - 'radeon_dataflow_swizzles.c', - 'radeon_variable.c', - 'radeon_list.c', - 'r3xx_fragprog.c', - 'r300_fragprog.c', - 'r300_fragprog_swizzle.c', - 'r300_fragprog_emit.c', - 'r500_fragprog.c', - 'r500_fragprog_emit.c', - 'r3xx_vertprog.c', - 'r3xx_vertprog_dump.c', - 'memory_pool.c', - '#/src/glsl/ralloc.c', - '#/src/mesa/program/register_allocate.c' - ]) - -Return('r300compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.c b/src/mesa/drivers/dri/r300/compiler/memory_pool.c deleted file mode 100644 index ddcdddf..0000000 --- a/src/mesa/drivers/dri/r300/compiler/memory_pool.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright 2009 Nicolai Hähnle - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "memory_pool.h" - -#include -#include -#include - - -#define POOL_LARGE_ALLOC 4096 -#define POOL_ALIGN 8 - - -struct memory_block { - struct memory_block * next; -}; - -void memory_pool_init(struct memory_pool * pool) -{ - memset(pool, 0, sizeof(struct memory_pool)); -} - - -void memory_pool_destroy(struct memory_pool * pool) -{ - while(pool->blocks) { - struct memory_block * block = pool->blocks; - pool->blocks = block->next; - free(block); - } -} - -static void refill_pool(struct memory_pool * pool) -{ - unsigned int blocksize = pool->total_allocated; - struct memory_block * newblock; - - if (!blocksize) - blocksize = 2*POOL_LARGE_ALLOC; - - newblock = (struct memory_block*)malloc(blocksize); - newblock->next = pool->blocks; - pool->blocks = newblock; - - pool->head = (unsigned char*)(newblock + 1); - pool->end = ((unsigned char*)newblock) + blocksize; - pool->total_allocated += blocksize; -} - - -void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes) -{ - if (bytes < POOL_LARGE_ALLOC) { - void * ptr; - - if (pool->head + bytes > pool->end) - refill_pool(pool); - - assert(pool->head + bytes <= pool->end); - - ptr = pool->head; - - pool->head += bytes; - pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); - - return ptr; - } else { - struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block)); - - block->next = pool->blocks; - pool->blocks = block; - - return (block + 1); - } -} - - diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h deleted file mode 100644 index 42344d0..0000000 --- a/src/mesa/drivers/dri/r300/compiler/memory_pool.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright 2009 Nicolai Hähnle - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef MEMORY_POOL_H -#define MEMORY_POOL_H - -struct memory_block; - -/** - * Provides a pool of memory that can quickly be allocated from, at the - * cost of being unable to explicitly free one of the allocated blocks. - * Instead, the entire pool can be freed at once. - * - * The idea is to allow one to quickly allocate a flexible amount of - * memory during operations like shader compilation while avoiding - * reference counting headaches. - */ -struct memory_pool { - unsigned char * head; - unsigned char * end; - unsigned int total_allocated; - struct memory_block * blocks; -}; - - -void memory_pool_init(struct memory_pool * pool); -void memory_pool_destroy(struct memory_pool * pool); -void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); - - -/** - * Generic helper for growing an array that has separate size/count - * and reserved counters to accomodate up to num new element. - * - * type * Array; - * unsigned int Size; - * unsigned int Reserved; - * - * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k); - * assert(Size + k < Reserved); - * - * \note Size is not changed by this macro. - * - * \warning Array, Size, Reserved have to be lvalues and may be evaluated - * several times. - */ -#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \ - unsigned int _num = (num); \ - if ((size) + _num > (reserved)) { \ - unsigned int newreserve = (reserved) * 2; \ - type * newarray; \ - if (newreserve < _num) \ - newreserve = 4 * _num; /* arbitrary heuristic */ \ - newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \ - memcpy(newarray, (array), (size) * sizeof(type)); \ - (array) = newarray; \ - (reserved) = newreserve; \ - } \ -} while(0) - -#endif /* MEMORY_POOL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c deleted file mode 100644 index deba9ca..0000000 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ /dev/null @@ -1,338 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "r300_fragprog.h" - -#include - -#include "../r300_reg.h" - -static void presub_string(char out[10], unsigned int inst) -{ - switch(inst & 0x600000){ - case R300_ALU_SRCP_1_MINUS_2_SRC0: - sprintf(out, "bias"); - break; - case R300_ALU_SRCP_SRC1_MINUS_SRC0: - sprintf(out, "sub"); - break; - case R300_ALU_SRCP_SRC1_PLUS_SRC0: - sprintf(out, "add"); - break; - case R300_ALU_SRCP_1_MINUS_SRC0: - sprintf(out, "inv "); - break; - } -} - -static int get_msb(unsigned int bit, unsigned int r400_ext_addr) -{ - return (r400_ext_addr & bit) ? 1 << 5 : 0; -} - -/* just some random things... */ -void r300FragmentProgramDump(struct radeon_compiler *c, void *user) -{ - struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; - struct r300_fragment_program_code *code = &compiler->code->code.r300; - int n, i, j; - static int pc = 0; - - fprintf(stderr, "pc=%d*************************************\n", pc++); - - fprintf(stderr, "Hardware program\n"); - fprintf(stderr, "----------------\n"); - if (c->is_r400) { - fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext); - } - - for (n = 0; n <= (code->config & 3); n++) { - uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; - unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) + - (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6); - unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) + - (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6); - int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; - int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; - - fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, " - "alu_end: %u, tex_end: %d (code_addr: %08x)\n", n, - alu_offset, tex_offset, alu_end, tex_end, code_addr); - - if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { - fprintf(stderr, " TEX:\n"); - for (i = tex_offset; - i <= tex_offset + tex_end; - ++i) { - const char *instr; - - switch ((code->tex. - inst[i] >> R300_TEX_INST_SHIFT) & - 15) { - case R300_TEX_OP_LD: - instr = "TEX"; - break; - case R300_TEX_OP_KIL: - instr = "KIL"; - break; - case R300_TEX_OP_TXP: - instr = "TXP"; - break; - case R300_TEX_OP_TXB: - instr = "TXB"; - break; - default: - instr = "UNKNOWN"; - } - - fprintf(stderr, - " %s t%i, %c%i, texture[%i] (%08x)\n", - instr, - (code->tex. - inst[i] >> R300_DST_ADDR_SHIFT) & 31, - 't', - (code->tex. - inst[i] >> R300_SRC_ADDR_SHIFT) & 31, - (code->tex. - inst[i] & R300_TEX_ID_MASK) >> - R300_TEX_ID_SHIFT, - code->tex.inst[i]); - } - } - - for (i = alu_offset; - i <= alu_offset + alu_end; ++i) { - char srcc[4][10], dstc[20]; - char srca[4][10], dsta[20]; - char argc[3][20]; - char arga[3][20]; - char flags[5], tmp[10]; - - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].rgb_addr >> (j * 6); - int rega = code->alu.inst[i].alpha_addr >> (j * 6); - int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), - code->alu.inst[i].r400_ext_addr); - int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), - code->alu.inst[i].r400_ext_addr); - - sprintf(srcc[j], "%c%i", - (regc & 32) ? 'c' : 't', (regc & 31) | msbc); - sprintf(srca[j], "%c%i", - (rega & 32) ? 'c' : 't', (rega & 31) | msba); - } - - dstc[0] = 0; - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", - (code->alu.inst[i]. - rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", - (code->alu.inst[i]. - rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); - if (flags[0] != 0) { - unsigned int msb = get_msb( - R400_ADDRD_EXT_RGB_MSB_BIT, - code->alu.inst[i].r400_ext_addr); - - sprintf(dstc, "t%i.%s ", - ((code->alu.inst[i]. - rgb_addr >> R300_ALU_DSTC_SHIFT) - & 31) | msb, - flags); - } - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", - (code->alu.inst[i]. - rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", - (code->alu.inst[i]. - rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(tmp, "o%i.%s", - (code->alu.inst[i]. - rgb_addr >> 29) & 3, - flags); - strcat(dstc, tmp); - } - /* Presub */ - presub_string(srcc[3], code->alu.inst[i].rgb_inst); - presub_string(srca[3], code->alu.inst[i].alpha_inst); - - dsta[0] = 0; - if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { - unsigned int msb = get_msb( - R400_ADDRD_EXT_A_MSB_BIT, - code->alu.inst[i].r400_ext_addr); - sprintf(dsta, "t%i.w ", - ((code->alu.inst[i]. - alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) - | msb); - } - if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { - sprintf(tmp, "o%i.w ", - (code->alu.inst[i]. - alpha_addr >> 25) & 3); - strcat(dsta, tmp); - } - if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { - strcat(dsta, "Z"); - } - - fprintf(stderr, - "%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n" - " w: %3s %3s %3s %5s-> %-20s (%08x)\n", i, - srcc[0], srcc[1], srcc[2], srcc[3], dstc, - code->alu.inst[i].rgb_addr, srca[0], srca[1], - srca[2], srca[3], dsta, - code->alu.inst[i].alpha_addr); - - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].rgb_inst >> (j * 7); - int rega = code->alu.inst[i].alpha_inst >> (j * 7); - int d; - char buf[20]; - - d = regc & 31; - if (d < 12) { - switch (d % 4) { - case R300_ALU_ARGC_SRC0C_XYZ: - sprintf(buf, "%s.xyz", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_XXX: - sprintf(buf, "%s.xxx", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_YYY: - sprintf(buf, "%s.yyy", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_ZZZ: - sprintf(buf, "%s.zzz", - srcc[d / 4]); - break; - } - } else if (d < 15) { - sprintf(buf, "%s.www", srca[d - 12]); - } else if (d < 20 ) { - switch(d) { - case R300_ALU_ARGC_SRCP_XYZ: - sprintf(buf, "srcp.xyz"); - break; - case R300_ALU_ARGC_SRCP_XXX: - sprintf(buf, "srcp.xxx"); - break; - case R300_ALU_ARGC_SRCP_YYY: - sprintf(buf, "srcp.yyy"); - break; - case R300_ALU_ARGC_SRCP_ZZZ: - sprintf(buf, "srcp.zzz"); - break; - case R300_ALU_ARGC_SRCP_WWW: - sprintf(buf, "srcp.www"); - break; - } - } else if (d == 20) { - sprintf(buf, "0.0"); - } else if (d == 21) { - sprintf(buf, "1.0"); - } else if (d == 22) { - sprintf(buf, "0.5"); - } else if (d >= 23 && d < 32) { - d -= 23; - switch (d / 3) { - case 0: - sprintf(buf, "%s.yzx", - srcc[d % 3]); - break; - case 1: - sprintf(buf, "%s.zxy", - srcc[d % 3]); - break; - case 2: - sprintf(buf, "%s.Wzy", - srcc[d % 3]); - break; - } - } else { - sprintf(buf, "%i", d); - } - - sprintf(argc[j], "%s%s%s%s", - (regc & 32) ? "-" : "", - (regc & 64) ? "|" : "", - buf, (regc & 64) ? "|" : ""); - - d = rega & 31; - if (d < 9) { - sprintf(buf, "%s.%c", srcc[d / 3], - 'x' + (char)(d % 3)); - } else if (d < 12) { - sprintf(buf, "%s.w", srca[d - 9]); - } else if (d < 16) { - switch(d) { - case R300_ALU_ARGA_SRCP_X: - sprintf(buf, "srcp.x"); - break; - case R300_ALU_ARGA_SRCP_Y: - sprintf(buf, "srcp.y"); - break; - case R300_ALU_ARGA_SRCP_Z: - sprintf(buf, "srcp.z"); - break; - case R300_ALU_ARGA_SRCP_W: - sprintf(buf, "srcp.w"); - break; - } - } else if (d == 16) { - sprintf(buf, "0.0"); - } else if (d == 17) { - sprintf(buf, "1.0"); - } else if (d == 18) { - sprintf(buf, "0.5"); - } else { - sprintf(buf, "%i", d); - } - - sprintf(arga[j], "%s%s%s%s", - (rega & 32) ? "-" : "", - (rega & 64) ? "|" : "", - buf, (rega & 64) ? "|" : ""); - } - - fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n" - " w: %8s %8s %8s op: %08x\n", - argc[0], argc[1], argc[2], - code->alu.inst[i].rgb_inst, - code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ? - "NOP" : "", - arga[0], arga[1],arga[2], - code->alu.inst[i].alpha_inst); - } - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h deleted file mode 100644 index 0c88bab..0000000 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/* - * Authors: - * Ben Skeggs - * Jerome Glisse - */ -#ifndef __R300_FRAGPROG_H_ -#define __R300_FRAGPROG_H_ - -#include "radeon_compiler.h" -#include "radeon_program.h" - - -extern void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user); - -extern void r300FragmentProgramDump(struct radeon_compiler *c, void *user); - -#endif diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c deleted file mode 100644 index e6fd1fd..0000000 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ /dev/null @@ -1,536 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * \file - * - * Emit the r300_fragment_program_code that can be understood by the hardware. - * Input is a pre-transformed radeon_program. - * - * \author Ben Skeggs - * - * \author Jerome Glisse - */ - -#include "r300_fragprog.h" - -#include "../r300_reg.h" - -#include "radeon_program_pair.h" -#include "r300_fragprog_swizzle.h" - - -struct r300_emit_state { - struct r300_fragment_program_compiler * compiler; - - unsigned current_node : 2; - unsigned node_first_tex : 8; - unsigned node_first_alu : 8; - uint32_t node_flags; -}; - -#define PROG_CODE \ - struct r300_fragment_program_compiler *c = emit->compiler; \ - struct r300_fragment_program_code *code = &c->code->code.r300 - -#define error(fmt, args...) do { \ - rc_error(&c->Base, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ - } while(0) - -static unsigned int get_msbs_alu(unsigned int bits) -{ - return (bits >> 6) & 0x7; -} - -/** - * @param lsbs The number of least significant bits - */ -static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) -{ - return (bits >> lsbs) & 0x15; -} - -#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask) - -/** - * Mark a temporary register as used. - */ -static void use_temporary(struct r300_fragment_program_code *code, unsigned int index) -{ - if (index > code->pixsize) - code->pixsize = index; -} - -static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src) -{ - if (!src.Used) - return 0; - - if (src.File == RC_FILE_CONSTANT) { - return src.Index | (1 << 5); - } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { - use_temporary(code, src.Index); - return src.Index & 0x1f; - } - - return 0; -} - - -static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) -{ - switch(opcode) { - case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; - case RC_OPCODE_CND: return R300_ALU_OUTC_CND; - case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; - case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; - case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; - default: - error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); - /* fall through */ - case RC_OPCODE_NOP: - /* fall through */ - case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; - case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; - case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; - case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; - } -} - -static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) -{ - switch(opcode) { - case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; - case RC_OPCODE_CND: return R300_ALU_OUTA_CND; - case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; - case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; - case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; - case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; - case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; - default: - error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); - /* fall through */ - case RC_OPCODE_NOP: - /* fall through */ - case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; - case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; - case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; - case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; - case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; - } -} - -/** - * Emit one paired ALU instruction. - */ -static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst) -{ - int ip; - int j; - PROG_CODE; - - if (code->alu.length >= c->Base.max_alu_insts) { - error("Too many ALU instructions"); - return 0; - } - - ip = code->alu.length++; - - code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); - code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); - - for(j = 0; j < 3; ++j) { - /* Set the RGB address */ - unsigned int src = use_source(code, inst->RGB.Src[j]); - unsigned int arg; - if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) - code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); - - code->alu.inst[ip].rgb_addr |= src << (6*j); - - /* Set the Alpha address */ - src = use_source(code, inst->Alpha.Src[j]); - if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) - code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); - - code->alu.inst[ip].alpha_addr |= src << (6*j); - - arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); - arg |= inst->RGB.Arg[j].Abs << 6; - arg |= inst->RGB.Arg[j].Negate << 5; - code->alu.inst[ip].rgb_inst |= arg << (7*j); - - arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); - arg |= inst->Alpha.Arg[j].Abs << 6; - arg |= inst->Alpha.Arg[j].Negate << 5; - code->alu.inst[ip].alpha_inst |= arg << (7*j); - } - - /* Presubtract */ - if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { - case RC_PRESUB_BIAS: - code->alu.inst[ip].rgb_inst |= - R300_ALU_SRCP_1_MINUS_2_SRC0; - break; - case RC_PRESUB_ADD: - code->alu.inst[ip].rgb_inst |= - R300_ALU_SRCP_SRC1_PLUS_SRC0; - break; - case RC_PRESUB_SUB: - code->alu.inst[ip].rgb_inst |= - R300_ALU_SRCP_SRC1_MINUS_SRC0; - break; - case RC_PRESUB_INV: - code->alu.inst[ip].rgb_inst |= - R300_ALU_SRCP_1_MINUS_SRC0; - break; - default: - break; - } - } - - if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { - switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { - case RC_PRESUB_BIAS: - code->alu.inst[ip].alpha_inst |= - R300_ALU_SRCP_1_MINUS_2_SRC0; - break; - case RC_PRESUB_ADD: - code->alu.inst[ip].alpha_inst |= - R300_ALU_SRCP_SRC1_PLUS_SRC0; - break; - case RC_PRESUB_SUB: - code->alu.inst[ip].alpha_inst |= - R300_ALU_SRCP_SRC1_MINUS_SRC0; - break; - case RC_PRESUB_INV: - code->alu.inst[ip].alpha_inst |= - R300_ALU_SRCP_1_MINUS_SRC0; - break; - default: - break; - } - } - - if (inst->RGB.Saturate) - code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; - if (inst->Alpha.Saturate) - code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; - - if (inst->RGB.WriteMask) { - use_temporary(code, inst->RGB.DestIndex); - if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) - code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; - code->alu.inst[ip].rgb_addr |= - ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | - (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); - } - if (inst->RGB.OutputWriteMask) { - code->alu.inst[ip].rgb_addr |= - (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) | - R300_RGB_TARGET(inst->RGB.Target); - emit->node_flags |= R300_RGBA_OUT; - } - - if (inst->Alpha.WriteMask) { - use_temporary(code, inst->Alpha.DestIndex); - if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) - code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; - code->alu.inst[ip].alpha_addr |= - ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | - R300_ALU_DSTA_REG; - } - if (inst->Alpha.OutputWriteMask) { - code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | - R300_ALPHA_TARGET(inst->Alpha.Target); - emit->node_flags |= R300_RGBA_OUT; - } - if (inst->Alpha.DepthWriteMask) { - code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; - emit->node_flags |= R300_W_OUT; - c->code->writes_depth = 1; - } - if (inst->Nop) - code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP; - - return 1; -} - - -/** - * Finish the current node without advancing to the next one. - */ -static int finish_node(struct r300_emit_state * emit) -{ - struct r300_fragment_program_compiler * c = emit->compiler; - struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; - unsigned alu_offset; - unsigned alu_end; - unsigned tex_offset; - unsigned tex_end; - - unsigned int alu_offset_msbs, alu_end_msbs; - - if (code->alu.length == emit->node_first_alu) { - /* Generate a single NOP for this node */ - struct rc_pair_instruction inst; - memset(&inst, 0, sizeof(inst)); - if (!emit_alu(emit, &inst)) - return 0; - } - - alu_offset = emit->node_first_alu; - alu_end = code->alu.length - alu_offset - 1; - tex_offset = emit->node_first_tex; - tex_end = code->tex.length - tex_offset - 1; - - if (code->tex.length == emit->node_first_tex) { - if (emit->current_node > 0) { - error("Node %i has no TEX instructions", emit->current_node); - return 0; - } - - tex_end = 0; - } else { - if (emit->current_node == 0) - code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; - } - - /* Write the config register. - * Note: The order in which the words for each node are written - * is not correct here and needs to be fixed up once we're entirely - * done - * - * Also note that the register specification from AMD is slightly - * incorrect in its description of this register. */ - code->code_addr[emit->current_node] = - ((alu_offset << R300_ALU_START_SHIFT) - & R300_ALU_START_MASK) - | ((alu_end << R300_ALU_SIZE_SHIFT) - & R300_ALU_SIZE_MASK) - | ((tex_offset << R300_TEX_START_SHIFT) - & R300_TEX_START_MASK) - | ((tex_end << R300_TEX_SIZE_SHIFT) - & R300_TEX_SIZE_MASK) - | emit->node_flags - | (get_msbs_tex(tex_offset, 5) - << R400_TEX_START_MSB_SHIFT) - | (get_msbs_tex(tex_end, 5) - << R400_TEX_SIZE_MSB_SHIFT) - ; - - /* Write r400 extended instruction fields. These will be ignored on - * r300 cards. */ - alu_offset_msbs = get_msbs_alu(alu_offset); - alu_end_msbs = get_msbs_alu(alu_end); - switch(emit->current_node) { - case 0: - code->r400_code_offset_ext |= - alu_offset_msbs << R400_ALU_START3_MSB_SHIFT - | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; - break; - case 1: - code->r400_code_offset_ext |= - alu_offset_msbs << R400_ALU_START2_MSB_SHIFT - | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; - break; - case 2: - code->r400_code_offset_ext |= - alu_offset_msbs << R400_ALU_START1_MSB_SHIFT - | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; - break; - case 3: - code->r400_code_offset_ext |= - alu_offset_msbs << R400_ALU_START0_MSB_SHIFT - | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; - break; - } - return 1; -} - - -/** - * Begin a block of texture instructions. - * Create the necessary indirection. - */ -static int begin_tex(struct r300_emit_state * emit) -{ - PROG_CODE; - - if (code->alu.length == emit->node_first_alu && - code->tex.length == emit->node_first_tex) { - return 1; - } - - if (emit->current_node == 3) { - error("Too many texture indirections"); - return 0; - } - - if (!finish_node(emit)) - return 0; - - emit->current_node++; - emit->node_first_tex = code->tex.length; - emit->node_first_alu = code->alu.length; - emit->node_flags = 0; - return 1; -} - - -static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) -{ - unsigned int unit; - unsigned int dest; - unsigned int opcode; - PROG_CODE; - - if (code->tex.length >= emit->compiler->Base.max_tex_insts) { - error("Too many TEX instructions"); - return 0; - } - - unit = inst->U.I.TexSrcUnit; - dest = inst->U.I.DstReg.Index; - - switch(inst->U.I.Opcode) { - case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; - case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; - case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; - case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; - default: - error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name); - return 0; - } - - if (inst->U.I.Opcode == RC_OPCODE_KIL) { - unit = 0; - dest = 0; - } else { - use_temporary(code, dest); - } - - use_temporary(code, inst->U.I.SrcReg[0].Index); - - code->tex.inst[code->tex.length++] = - ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) - & R300_SRC_ADDR_MASK) - | ((dest << R300_DST_ADDR_SHIFT) - & R300_DST_ADDR_MASK) - | (unit << R300_TEX_ID_SHIFT) - | (opcode << R300_TEX_INST_SHIFT) - | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? - R400_SRC_ADDR_EXT_BIT : 0) - | (dest >= R300_PFS_NUM_TEMP_REGS ? - R400_DST_ADDR_EXT_BIT : 0) - ; - return 1; -} - - -/** - * Final compilation step: Turn the intermediate radeon_program into - * machine-readable instructions. - */ -void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) -{ - struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; - struct r300_emit_state emit; - struct r300_fragment_program_code *code = &compiler->code->code.r300; - unsigned int tex_end; - - memset(&emit, 0, sizeof(emit)); - emit.compiler = compiler; - - memset(code, 0, sizeof(struct r300_fragment_program_code)); - - for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; - inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; - inst = inst->Next) { - if (inst->Type == RC_INSTRUCTION_NORMAL) { - if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { - begin_tex(&emit); - continue; - } - - emit_tex(&emit, inst); - } else { - emit_alu(&emit, &inst->U.P); - } - } - - if (code->pixsize >= compiler->Base.max_temp_regs) - rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); - - if (compiler->Base.Error) - return; - - /* Finish the program */ - finish_node(&emit); - - code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ - - /* Set r400 extended instruction fields. These values will be ignored - * on r300 cards. */ - code->r400_code_offset_ext |= - (get_msbs_alu(0) - << R400_ALU_OFFSET_MSB_SHIFT) - | (get_msbs_alu(code->alu.length - 1) - << R400_ALU_SIZE_MSB_SHIFT); - - tex_end = code->tex.length ? code->tex.length - 1 : 0; - code->code_offset = - ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) - & R300_PFS_CNTL_ALU_OFFSET_MASK) - | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) - & R300_PFS_CNTL_ALU_END_MASK) - | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) - & R300_PFS_CNTL_TEX_OFFSET_MASK) - | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) - & R300_PFS_CNTL_TEX_END_MASK) - | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) - | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) - ; - - if (emit.current_node < 3) { - int shift = 3 - emit.current_node; - int i; - for(i = emit.current_node; i >= 0; --i) - code->code_addr[shift + i] = code->code_addr[i]; - for(i = 0; i < shift; ++i) - code->code_addr[i] = 0; - } - - if (code->pixsize >= R300_PFS_NUM_TEMP_REGS - || code->alu.length > R300_PFS_MAX_ALU_INST - || code->tex.length > R300_PFS_MAX_TEX_INST) { - - code->r390_mode = 1; - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c deleted file mode 100644 index b7bca8c..0000000 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * Utilities to deal with the somewhat odd restriction on R300 fragment - * program swizzles. - */ - -#include "r300_fragprog_swizzle.h" - -#include - -#include "../r300_reg.h" -#include "radeon_compiler.h" - -#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO)) - -struct swizzle_data { - unsigned int hash; /**< swizzle value this matches */ - unsigned int base; /**< base value for hw swizzle */ - unsigned int stride; /**< difference in base between arg0/1/2 */ - unsigned int srcp_stride; /**< difference in base between arg0/scrp */ -}; - -static const struct swizzle_data native_swizzles[] = { - {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15}, - {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15}, - {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15}, - {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15}, - {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7}, - {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0}, - {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0}, - {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0}, - {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0}, - {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0} -}; - -static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); - -/** - * Find a native RGB swizzle that matches the given swizzle. - * Returns 0 if none found. - */ -static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) -{ - int i, comp; - - for(i = 0; i < num_native_swizzles; ++i) { - const struct swizzle_data* sd = &native_swizzles[i]; - for(comp = 0; comp < 3; ++comp) { - unsigned int swz = GET_SWZ(swizzle, comp); - if (swz == RC_SWIZZLE_UNUSED) - continue; - if (swz != GET_SWZ(sd->hash, comp)) - break; - } - if (comp == 3) - return sd; - } - - return 0; -} - -/** - * Determines if the given swizzle is valid for r300/r400. In most situations - * it is better to use r300_swizzle_is_native() which can be accesed via - * struct radeon_compiler *c; c->SwizzleCaps->IsNative(). - */ -int r300_swizzle_is_native_basic(unsigned int swizzle) -{ - if(lookup_native_swizzle(swizzle)) - return 1; - else - return 0; -} - -/** - * Check whether the given instruction supports the swizzle and negate - * combinations in the given source register. - */ -static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) -{ - const struct swizzle_data* sd; - unsigned int relevant; - int j; - - if (opcode == RC_OPCODE_KIL || - opcode == RC_OPCODE_TEX || - opcode == RC_OPCODE_TXB || - opcode == RC_OPCODE_TXP) { - if (reg.Abs || reg.Negate) - return 0; - - for(j = 0; j < 4; ++j) { - unsigned int swz = GET_SWZ(reg.Swizzle, j); - if (swz == RC_SWIZZLE_UNUSED) - continue; - if (swz != j) - return 0; - } - - return 1; - } - - relevant = 0; - - for(j = 0; j < 3; ++j) - if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED) - relevant |= 1 << j; - - if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return 0; - - sd = lookup_native_swizzle(reg.Swizzle); - if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0)) - return 0; - - return 1; -} - - -static void r300_swizzle_split( - struct rc_src_register src, unsigned int mask, - struct rc_swizzle_split * split) -{ - split->NumPhases = 0; - - while(mask) { - unsigned int best_matchcount = 0; - unsigned int best_matchmask = 0; - int i, comp; - - for(i = 0; i < num_native_swizzles; ++i) { - const struct swizzle_data *sd = &native_swizzles[i]; - unsigned int matchcount = 0; - unsigned int matchmask = 0; - for(comp = 0; comp < 3; ++comp) { - unsigned int swz; - if (!GET_BIT(mask, comp)) - continue; - swz = GET_SWZ(src.Swizzle, comp); - if (swz == RC_SWIZZLE_UNUSED) - continue; - if (swz == GET_SWZ(sd->hash, comp)) { - /* check if the negate bit of current component - * is the same for already matched components */ - if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) - continue; - - matchcount++; - matchmask |= 1 << comp; - } - } - if (matchcount > best_matchcount) { - best_matchcount = matchcount; - best_matchmask = matchmask; - if (matchmask == (mask & RC_MASK_XYZ)) - break; - } - } - - if (mask & RC_MASK_W) - best_matchmask |= RC_MASK_W; - - split->Phase[split->NumPhases++] = best_matchmask; - mask &= ~best_matchmask; - } -} - -struct rc_swizzle_caps r300_swizzle_caps = { - .IsNative = r300_swizzle_is_native, - .Split = r300_swizzle_split -}; - - -/** - * Translate an RGB (XYZ) swizzle into the hardware code for the given - * instruction source. - */ -unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) -{ - const struct swizzle_data* sd = lookup_native_swizzle(swizzle); - - if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) { - fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); - return 0; - } - - if (src == RC_PAIR_PRESUB_SRC) { - return sd->base + sd->srcp_stride; - } else { - return sd->base + src*sd->stride; - } -} - - -/** - * Translate an Alpha (W) swizzle into the hardware code for the given - * instruction source. - */ -unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) -{ - unsigned int swz = GET_SWZ(swizzle, 0); - if (src == RC_PAIR_PRESUB_SRC) { - return R300_ALU_ARGA_SRCP_X + swz; - } - if (swz < 3) - return swz + 3*src; - - switch(swz) { - case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; - case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; - case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; - case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF; - default: return R300_ALU_ARGA_ONE; - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h deleted file mode 100644 index f2635be..0000000 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __R300_FRAGPROG_SWIZZLE_H_ -#define __R300_FRAGPROG_SWIZZLE_H_ - -#include "radeon_swizzle.h" - -extern struct rc_swizzle_caps r300_swizzle_caps; - -unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle); -unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle); -int r300_swizzle_is_native_basic(unsigned int swizzle); - -#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c deleted file mode 100644 index bb6c010..0000000 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright 2009 Nicolai Hähnle - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "radeon_compiler.h" - -#include - -#include "radeon_compiler_util.h" -#include "radeon_dataflow.h" -#include "radeon_emulate_branches.h" -#include "radeon_emulate_loops.h" -#include "radeon_program_alu.h" -#include "radeon_program_tex.h" -#include "radeon_rename_regs.h" -#include "radeon_remove_constants.h" -#include "r300_fragprog.h" -#include "r300_fragprog_swizzle.h" -#include "r500_fragprog.h" - - -static void dataflow_outputs_mark_use(void * userdata, void * data, - void (*callback)(void *, unsigned int, unsigned int)) -{ - struct r300_fragment_program_compiler * c = userdata; - callback(data, c->OutputColor[0], RC_MASK_XYZW); - callback(data, c->OutputColor[1], RC_MASK_XYZW); - callback(data, c->OutputColor[2], RC_MASK_XYZW); - callback(data, c->OutputColor[3], RC_MASK_XYZW); - callback(data, c->OutputDepth, RC_MASK_W); -} - -static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) -{ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; - struct rc_instruction *rci; - - for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { - struct rc_sub_instruction * inst = &rci->U.I; - unsigned i; - const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode); - - if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) - continue; - - if (inst->DstReg.WriteMask & RC_MASK_Z) { - inst->DstReg.WriteMask = RC_MASK_W; - } else { - inst->DstReg.WriteMask = 0; - continue; - } - - if (!info->IsComponentwise) { - continue; - } - - for (i = 0; i < info->NumSrcRegs; i++) { - inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]); - } - } -} - -static int radeon_saturate_output( - struct radeon_compiler * c, - struct rc_instruction * inst, - void* data) -{ - const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); - - if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT) - return 0; - - inst->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; - return 1; -} - -void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) -{ - int is_r500 = c->Base.is_r500; - int opt = !c->Base.disable_optimizations; - int sat_out = c->state.frag_clamp; - - /* Lists of instruction transformations. */ - struct radeon_program_transformation saturate_output[] = { - { &radeon_saturate_output, c }, - { 0, 0 } - }; - - struct radeon_program_transformation rewrite_tex[] = { - { &radeonTransformTEX, c }, - { 0, 0 } - }; - - struct radeon_program_transformation rewrite_if[] = { - { &r500_transform_IF, 0 }, - {0, 0} - }; - - struct radeon_program_transformation native_rewrite_r500[] = { - { &radeonTransformALU, 0 }, - { &radeonTransformDeriv, 0 }, - { &radeonTransformTrigScale, 0 }, - { 0, 0 } - }; - - struct radeon_program_transformation native_rewrite_r300[] = { - { &radeonTransformALU, 0 }, - { &r300_transform_trig_simple, 0 }, - { 0, 0 } - }; - - /* List of compiler passes. */ - struct radeon_compiler_pass fs_list[] = { - /* NAME DUMP PREDICATE FUNCTION PARAM */ - {"rewrite depth out", 1, 1, rc_rewrite_depth_out, NULL}, - /* This transformation needs to be done before any of the IF - * instructions are modified. */ - {"transform KILP", 1, 1, rc_transform_KILP, NULL}, - {"unroll loops", 1, is_r500, rc_unroll_loops, NULL}, - {"transform loops", 1, !is_r500, rc_transform_loops, NULL}, - {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, - {"saturate output writes", 1, sat_out, rc_local_transform, saturate_output}, - {"transform TEX", 1, 1, rc_local_transform, rewrite_tex}, - {"transform IF", 1, is_r500, rc_local_transform, rewrite_if}, - {"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500}, - {"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300}, - {"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_use}, - {"emulate loops", 1, !is_r500, rc_emulate_loops, NULL}, - {"dataflow optimize", 1, opt, rc_optimize, NULL}, - {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, - {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, - /* This pass makes it easier for the scheduler to group TEX - * instructions and reduces the chances of creating too - * many texture indirections.*/ - {"register rename", 1, !is_r500, rc_rename_regs, NULL}, - {"pair translate", 1, 1, rc_pair_translate, NULL}, - {"pair scheduling", 1, 1, rc_pair_schedule, NULL}, - {"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL}, - {"register allocation", 1, 1, rc_pair_regalloc, &opt}, - {"final code validation", 0, 1, rc_validate_final_shader, NULL}, - {"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL}, - {"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL}, - {"dump machine code", 0, is_r500 && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL}, - {"dump machine code", 0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL}, - {NULL, 0, 0, NULL, NULL} - }; - - c->Base.type = RC_FRAGMENT_PROGRAM; - c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps; - - rc_run_compiler(&c->Base, fs_list); - - rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); -} diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c deleted file mode 100644 index 654f9a0..0000000 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ /dev/null @@ -1,1045 +0,0 @@ -/* - * Copyright 2009 Nicolai Hähnle - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "radeon_compiler.h" - -#include - -#include "../r300_reg.h" - -#include "radeon_compiler_util.h" -#include "radeon_dataflow.h" -#include "radeon_program_alu.h" -#include "radeon_swizzle.h" -#include "radeon_emulate_branches.h" -#include "radeon_emulate_loops.h" -#include "radeon_remove_constants.h" - -struct loop { - int BgnLoop; - -}; - -/* - * Take an already-setup and valid source then swizzle it appropriately to - * obtain a constant ZERO or ONE source. - */ -#define __CONST(x, y) \ - (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_src_class(vpi->SrcReg[x].File), \ - RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4)) - - -static unsigned long t_dst_mask(unsigned int mask) -{ - /* RC_MASK_* is equivalent to VSF_FLAG_* */ - return mask & RC_MASK_XYZW; -} - -static unsigned long t_dst_class(rc_register_file file) -{ - switch (file) { - default: - fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); - /* fall-through */ - case RC_FILE_TEMPORARY: - return PVS_DST_REG_TEMPORARY; - case RC_FILE_OUTPUT: - return PVS_DST_REG_OUT; - case RC_FILE_ADDRESS: - return PVS_DST_REG_A0; - } -} - -static unsigned long t_dst_index(struct r300_vertex_program_code *vp, - struct rc_dst_register *dst) -{ - if (dst->File == RC_FILE_OUTPUT) - return vp->outputs[dst->Index]; - - return dst->Index; -} - -static unsigned long t_src_class(rc_register_file file) -{ - switch (file) { - default: - fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); - /* fall-through */ - case RC_FILE_NONE: - case RC_FILE_TEMPORARY: - return PVS_SRC_REG_TEMPORARY; - case RC_FILE_INPUT: - return PVS_SRC_REG_INPUT; - case RC_FILE_CONSTANT: - return PVS_SRC_REG_CONSTANT; - } -} - -static int t_src_conflict(struct rc_src_register a, struct rc_src_register b) -{ - unsigned long aclass = t_src_class(a.File); - unsigned long bclass = t_src_class(b.File); - - if (aclass != bclass) - return 0; - if (aclass == PVS_SRC_REG_TEMPORARY) - return 0; - - if (a.RelAddr || b.RelAddr) - return 1; - if (a.Index != b.Index) - return 1; - - return 0; -} - -static inline unsigned long t_swizzle(unsigned int swizzle) -{ - /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ - return swizzle; -} - -static unsigned long t_src_index(struct r300_vertex_program_code *vp, - struct rc_src_register *src) -{ - if (src->File == RC_FILE_INPUT) { - assert(vp->inputs[src->Index] != -1); - return vp->inputs[src->Index]; - } else { - if (src->Index < 0) { - fprintf(stderr, - "negative offsets for indirect addressing do not work.\n"); - return 0; - } - return src->Index; - } -} - -/* these two functions should probably be merged... */ - -static unsigned long t_src(struct r300_vertex_program_code *vp, - struct rc_src_register *src) -{ - /* src->Negate uses the RC_MASK_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 1)), - t_swizzle(GET_SWZ(src->Swizzle, 2)), - t_swizzle(GET_SWZ(src->Swizzle, 3)), - t_src_class(src->File), - src->Negate) | - (src->RelAddr << 4) | (src->Abs << 3); -} - -static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, - struct rc_src_register *src) -{ - /* src->Negate uses the RC_MASK_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_src_class(src->File), - src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | - (src->RelAddr << 4) | (src->Abs << 3); -} - -static int valid_dst(struct r300_vertex_program_code *vp, - struct rc_dst_register *dst) -{ - if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) { - return 0; - } else if (dst->File == RC_FILE_ADDRESS) { - assert(dst->Index == 0); - } - - return 1; -} - -static void ei_vector1(struct r300_vertex_program_code *vp, - unsigned int hw_opcode, - struct rc_sub_instruction *vpi, - unsigned int * inst) -{ - inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - 0, - 0, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, RC_SWIZZLE_ZERO); - inst[3] = __CONST(0, RC_SWIZZLE_ZERO); -} - -static void ei_vector2(struct r300_vertex_program_code *vp, - unsigned int hw_opcode, - struct rc_sub_instruction *vpi, - unsigned int * inst) -{ - inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - 0, - 0, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &vpi->SrcReg[0]); - inst[2] = t_src(vp, &vpi->SrcReg[1]); - inst[3] = __CONST(1, RC_SWIZZLE_ZERO); -} - -static void ei_math1(struct r300_vertex_program_code *vp, - unsigned int hw_opcode, - struct rc_sub_instruction *vpi, - unsigned int * inst) -{ - inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - 1, - 0, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, RC_SWIZZLE_ZERO); - inst[3] = __CONST(0, RC_SWIZZLE_ZERO); -} - -static void ei_lit(struct r300_vertex_program_code *vp, - struct rc_sub_instruction *vpi, - unsigned int * inst) -{ - //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - - inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, - 1, - 0, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - /* NOTE: Users swizzling might not work. */ - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y - t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | - (vpi->SrcReg[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X - t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | - (vpi->SrcReg[0].RelAddr << 4); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W - t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | - (vpi->SrcReg[0].RelAddr << 4); -} - -static void ei_mad(struct r300_vertex_program_code *vp, - struct rc_sub_instruction *vpi, - unsigned int * inst) -{ - unsigned int i; - /* Remarks about hardware limitations of MAD - * (please preserve this comment, as this information is _NOT_ - * in the documentation provided by AMD). - * - * As described in the documentation, MAD with three unique temporary - * source registers requires the use of the macro version. - * - * However (and this is not mentioned in the documentation), apparently - * the macro version is _NOT_ a full superset of the normal version. - * In particular, the macro version does not always work when relative - * addressing is used in the source operands. - * - * This limitation caused incorrect rendering in Sauerbraten's OpenGL - * assembly shader path when using medium quality animations - * (i.e. animations with matrix blending instead of quaternion blending). - * - * Unfortunately, I (nha) have been unable to extract a Piglit regression - * test for this issue - for some reason, it is possible to have vertex - * programs whose prefix is *exactly* the same as the prefix of the - * offending program in Sauerbraten up to the offending instruction - * without causing any trouble. - * - * Bottom line: Only use the macro version only when really necessary; - * according to AMD docs, this should improve performance by one clock - * as a nice side bonus. - */ - if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && - vpi->SrcReg[1].File == RC_FILE_TEMPORARY && - vpi->SrcReg[2].File == RC_FILE_TEMPORARY && - vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && - vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && - vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { - inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, - 0, - 1, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - } else { - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - 0, - 0, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - /* Arguments with constant swizzles still count as a unique - * temporary, so we should make sure these arguments share a - * register index with one of the other arguments. */ - for (i = 0; i < 3; i++) { - unsigned int j; - if (vpi->SrcReg[i].File != RC_FILE_NONE) - continue; - - for (j = 0; j < 3; j++) { - if (i != j) { - vpi->SrcReg[i].Index = - vpi->SrcReg[j].Index; - break; - } - } - } - } - inst[1] = t_src(vp, &vpi->SrcReg[0]); - inst[2] = t_src(vp, &vpi->SrcReg[1]); - inst[3] = t_src(vp, &vpi->SrcReg[2]); -} - -static void ei_pow(struct r300_vertex_program_code *vp, - struct rc_sub_instruction *vpi, - unsigned int * inst) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, - 1, - 0, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, RC_SWIZZLE_ZERO); - inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); -} - -static void mark_write(void * userdata, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) -{ - unsigned int * writemasks = userdata; - - if (file != RC_FILE_TEMPORARY) - return; - - if (index >= R300_VS_MAX_TEMPS) - return; - - writemasks[index] |= mask; -} - -static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler) -{ - return PVS_SRC_OPERAND(compiler->PredicateIndex, - t_swizzle(RC_SWIZZLE_ZERO), - t_swizzle(RC_SWIZZLE_ZERO), - t_swizzle(RC_SWIZZLE_ZERO), - t_swizzle(RC_SWIZZLE_W), - t_src_class(RC_FILE_TEMPORARY), - 0); -} - -static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler, - unsigned int hw_opcode, int is_math) -{ - return PVS_OP_DST_OPERAND(hw_opcode, - is_math, - 0, - compiler->PredicateIndex, - RC_MASK_W, - t_dst_class(RC_FILE_TEMPORARY)); - -} - -static void ei_if(struct r300_vertex_program_compiler * compiler, - struct rc_instruction *rci, - unsigned int * inst, - unsigned int branch_depth) -{ - unsigned int predicate_opcode; - int is_math = 0; - - if (!compiler->Base.is_r500) { - rc_error(&compiler->Base,"Opcode IF not supported\n"); - return; - } - - /* Reserve a temporary to use as our predicate stack counter, if we - * don't already have one. */ - if (!compiler->PredicateMask) { - unsigned int writemasks[RC_REGISTER_MAX_INDEX]; - struct rc_instruction * inst; - unsigned int i; - memset(writemasks, 0, sizeof(writemasks)); - for(inst = compiler->Base.Program.Instructions.Next; - inst != &compiler->Base.Program.Instructions; - inst = inst->Next) { - rc_for_all_writes_mask(inst, mark_write, writemasks); - } - for(i = 0; i < compiler->Base.max_temp_regs; i++) { - unsigned int mask = ~writemasks[i] & RC_MASK_XYZW; - /* Only the W component can be used fo the predicate - * stack counter. */ - if (mask & RC_MASK_W) { - compiler->PredicateMask = RC_MASK_W; - compiler->PredicateIndex = i; - break; - } - } - if (i == compiler->Base.max_temp_regs) { - rc_error(&compiler->Base, "No free temporary to use for" - " predicate stack counter.\n"); - return; - } - } - predicate_opcode = - branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ; - - rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0)); - if (branch_depth == 0) { - is_math = 1; - predicate_opcode = ME_PRED_SET_NEQ; - inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]); - inst[2] = 0; - } else { - predicate_opcode = VE_PRED_SET_NEQ_PUSH; - inst[1] = t_pred_src(compiler); - inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]); - } - - inst[0] = t_pred_dst(compiler, predicate_opcode, is_math); - inst[3] = 0; - -} - -static void ei_else(struct r300_vertex_program_compiler * compiler, - unsigned int * inst) -{ - if (!compiler->Base.is_r500) { - rc_error(&compiler->Base,"Opcode ELSE not supported\n"); - return; - } - inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1); - inst[1] = t_pred_src(compiler); - inst[2] = 0; - inst[3] = 0; -} - -static void ei_endif(struct r300_vertex_program_compiler *compiler, - unsigned int * inst) -{ - if (!compiler->Base.is_r500) { - rc_error(&compiler->Base,"Opcode ENDIF not supported\n"); - return; - } - inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1); - inst[1] = t_pred_src(compiler); - inst[2] = 0; - inst[3] = 0; -} - -static void translate_vertex_program(struct radeon_compiler *c, void *user) -{ - struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; - struct rc_instruction *rci; - - struct loop * loops = NULL; - int current_loop_depth = 0; - int loops_reserved = 0; - - unsigned int branch_depth = 0; - - compiler->code->pos_end = 0; /* Not supported yet */ - compiler->code->length = 0; - compiler->code->num_temporaries = 0; - - compiler->SetHwInputOutput(compiler); - - for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { - struct rc_sub_instruction *vpi = &rci->U.I; - unsigned int *inst = compiler->code->body.d + compiler->code->length; - const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode); - - /* Skip instructions writing to non-existing destination */ - if (!valid_dst(compiler->code, &vpi->DstReg)) - continue; - - if (info->HasDstReg) { - /* Neither is Saturate. */ - if (vpi->SaturateMode != RC_SATURATE_NONE) { - rc_error(&compiler->Base, "Vertex program does not support the Saturate " - "modifier (yet).\n"); - } - } - - if (compiler->code->length >= c->max_alu_insts * 4) { - rc_error(&compiler->Base, "Vertex program has too many instructions\n"); - return; - } - - assert(compiler->Base.is_r500 || - (vpi->Opcode != RC_OPCODE_SEQ && - vpi->Opcode != RC_OPCODE_SNE)); - - switch (vpi->Opcode) { - case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; - case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; - case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break; - case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; - case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; - case RC_OPCODE_ELSE: ei_else(compiler, inst); break; - case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break; - case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; - case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; - case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; - case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break; - case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; - case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; - case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; - case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; - case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; - case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; - case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; - case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; - case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; - case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; - case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; - case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break; - case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; - case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break; - case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; - case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; - case RC_OPCODE_BGNLOOP: - { - struct loop * l; - - if ((!compiler->Base.is_r500 - && loops_reserved >= R300_VS_MAX_LOOP_DEPTH) - || loops_reserved >= R500_VS_MAX_FC_DEPTH) { - rc_error(&compiler->Base, - "Loops are nested too deep."); - return; - } - memory_pool_array_reserve(&compiler->Base.Pool, - struct loop, loops, current_loop_depth, - loops_reserved, 1); - l = &loops[current_loop_depth++]; - memset(l , 0, sizeof(struct loop)); - l->BgnLoop = (compiler->code->length / 4); - continue; - } - case RC_OPCODE_ENDLOOP: - { - struct loop * l; - unsigned int act_addr; - unsigned int last_addr; - unsigned int ret_addr; - - assert(loops); - l = &loops[current_loop_depth - 1]; - act_addr = l->BgnLoop - 1; - last_addr = (compiler->code->length / 4) - 1; - ret_addr = l->BgnLoop; - - if (loops_reserved >= R300_VS_MAX_FC_OPS) { - rc_error(&compiler->Base, - "Too many flow control instructions."); - return; - } - if (compiler->Base.is_r500) { - compiler->code->fc_op_addrs.r500 - [compiler->code->num_fc_ops].lw = - R500_PVS_FC_ACT_ADRS(act_addr) - | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff) - ; - compiler->code->fc_op_addrs.r500 - [compiler->code->num_fc_ops].uw = - R500_PVS_FC_LAST_INST(last_addr) - | R500_PVS_FC_RTN_INST(ret_addr) - ; - } else { - compiler->code->fc_op_addrs.r300 - [compiler->code->num_fc_ops] = - R300_PVS_FC_ACT_ADRS(act_addr) - | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff) - | R300_PVS_FC_LAST_INST(last_addr) - | R300_PVS_FC_RTN_INST(ret_addr) - ; - } - compiler->code->fc_loop_index[compiler->code->num_fc_ops] = - R300_PVS_FC_LOOP_INIT_VAL(0x0) - | R300_PVS_FC_LOOP_STEP_VAL(0x1) - ; - compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP( - compiler->code->num_fc_ops); - compiler->code->num_fc_ops++; - current_loop_depth--; - continue; - } - - default: - rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name); - return; - } - - /* Non-flow control instructions that are inside an if statement - * need to pay attention to the predicate bit. */ - if (branch_depth - && vpi->Opcode != RC_OPCODE_IF - && vpi->Opcode != RC_OPCODE_ELSE - && vpi->Opcode != RC_OPCODE_ENDIF) { - - inst[0] |= (PVS_DST_PRED_ENABLE_MASK - << PVS_DST_PRED_ENABLE_SHIFT); - inst[0] |= (PVS_DST_PRED_SENSE_MASK - << PVS_DST_PRED_SENSE_SHIFT); - } - - /* Update the number of temporaries. */ - if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY && - vpi->DstReg.Index >= compiler->code->num_temporaries) - compiler->code->num_temporaries = vpi->DstReg.Index + 1; - - for (unsigned i = 0; i < info->NumSrcRegs; i++) - if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY && - vpi->SrcReg[i].Index >= compiler->code->num_temporaries) - compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1; - - if (compiler->PredicateMask) - if (compiler->PredicateIndex >= compiler->code->num_temporaries) - compiler->code->num_temporaries = compiler->PredicateIndex + 1; - - if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) { - rc_error(&compiler->Base, "Too many temporaries.\n"); - return; - } - - compiler->code->length += 4; - - if (compiler->Base.Error) - return; - } -} - -struct temporary_allocation { - unsigned int Allocated:1; - unsigned int HwTemp:15; - struct rc_instruction * LastRead; -}; - -static void allocate_temporary_registers(struct radeon_compiler *c, void *user) -{ - struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; - struct rc_instruction *inst; - struct rc_instruction *end_loop = NULL; - unsigned int num_orig_temps = 0; - char hwtemps[RC_REGISTER_MAX_INDEX]; - struct temporary_allocation * ta; - unsigned int i, j; - - memset(hwtemps, 0, sizeof(hwtemps)); - - rc_recompute_ips(c); - - /* Pass 1: Count original temporaries. */ - for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - - for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { - if (inst->U.I.SrcReg[i].Index >= num_orig_temps) - num_orig_temps = inst->U.I.SrcReg[i].Index + 1; - } - } - - if (opcode->HasDstReg) { - if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { - if (inst->U.I.DstReg.Index >= num_orig_temps) - num_orig_temps = inst->U.I.DstReg.Index + 1; - } - } - } - - ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, - sizeof(struct temporary_allocation) * num_orig_temps); - memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); - - /* Pass 2: Determine original temporary lifetimes */ - for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - /* Instructions inside of loops need to use the ENDLOOP - * instruction as their LastRead. */ - if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { - int endloops = 1; - struct rc_instruction * ptr; - for(ptr = inst->Next; - ptr != &compiler->Base.Program.Instructions; - ptr = ptr->Next){ - if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { - endloops++; - } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { - endloops--; - if (endloops <= 0) { - end_loop = ptr; - break; - } - } - } - } - - if (inst == end_loop) { - end_loop = NULL; - continue; - } - - for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { - ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst; - } - } - } - - /* Pass 3: Register allocation */ - for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - - for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { - unsigned int orig = inst->U.I.SrcReg[i].Index; - inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; - - if (ta[orig].Allocated && inst == ta[orig].LastRead) - hwtemps[ta[orig].HwTemp] = 0; - } - } - - if (opcode->HasDstReg) { - if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { - unsigned int orig = inst->U.I.DstReg.Index; - - if (!ta[orig].Allocated) { - for(j = 0; j < c->max_temp_regs; ++j) { - if (!hwtemps[j]) - break; - } - ta[orig].Allocated = 1; - ta[orig].HwTemp = j; - hwtemps[ta[orig].HwTemp] = 1; - } - - inst->U.I.DstReg.Index = ta[orig].HwTemp; - } - } - } -} - -/** - * R3xx-R4xx vertex engine does not support the Absolute source operand modifier - * and the Saturate opcode modifier. Only Absolute is currently transformed. - */ -static int transform_nonnative_modifiers( - struct radeon_compiler *c, - struct rc_instruction *inst, - void* unused) -{ - const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - unsigned i; - - /* Transform ABS(a) to MAX(a, -a). */ - for (i = 0; i < opcode->NumSrcRegs; i++) { - if (inst->U.I.SrcReg[i].Abs) { - struct rc_instruction *new_inst; - unsigned temp; - - inst->U.I.SrcReg[i].Abs = 0; - - temp = rc_find_free_temporary(c); - - new_inst = rc_insert_new_instruction(c, inst->Prev); - new_inst->U.I.Opcode = RC_OPCODE_MAX; - new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - new_inst->U.I.DstReg.Index = temp; - new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i]; - new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i]; - new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; - - memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i])); - inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[i].Index = temp; - inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW; - } - } - return 1; -} - -/** - * Vertex engine cannot read two inputs or two constants at the same time. - * Introduce intermediate MOVs to temporary registers to account for this. - */ -static int transform_source_conflicts( - struct radeon_compiler *c, - struct rc_instruction* inst, - void* unused) -{ - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - - if (opcode->NumSrcRegs == 3) { - if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) - || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { - int tmpreg = rc_find_free_temporary(c); - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = tmpreg; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; - - reset_srcreg(&inst->U.I.SrcReg[2]); - inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[2].Index = tmpreg; - } - } - - if (opcode->NumSrcRegs >= 2) { - if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { - int tmpreg = rc_find_free_temporary(c); - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = tmpreg; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; - - reset_srcreg(&inst->U.I.SrcReg[1]); - inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[1].Index = tmpreg; - } - } - - return 1; -} - -static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user) -{ - struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c; - int i; - - for(i = 0; i < 32; ++i) { - if ((compiler->RequiredOutputs & (1 << i)) && - !(compiler->Base.Program.OutputsWritten & (1 << i))) { - struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); - inst->U.I.Opcode = RC_OPCODE_MOV; - - inst->U.I.DstReg.File = RC_FILE_OUTPUT; - inst->U.I.DstReg.Index = i; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - - inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT; - inst->U.I.SrcReg[0].Index = 0; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - - compiler->Base.Program.OutputsWritten |= 1 << i; - } - } -} - -static void dataflow_outputs_mark_used(void * userdata, void * data, - void (*callback)(void *, unsigned int, unsigned int)) -{ - struct r300_vertex_program_compiler * c = userdata; - int i; - - for(i = 0; i < 32; ++i) { - if (c->RequiredOutputs & (1 << i)) - callback(data, i, RC_MASK_XYZW); - } -} - -static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) -{ - (void) opcode; - (void) reg; - - return 1; -} - -static void transform_negative_addressing(struct r300_vertex_program_compiler *c, - struct rc_instruction *arl, - struct rc_instruction *end, - int min_offset) -{ - struct rc_instruction *inst, *add; - unsigned const_swizzle; - - /* Transform ARL */ - add = rc_insert_new_instruction(&c->Base, arl->Prev); - add->U.I.Opcode = RC_OPCODE_ADD; - add->U.I.DstReg.File = RC_FILE_TEMPORARY; - add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base); - add->U.I.DstReg.WriteMask = RC_MASK_X; - add->U.I.SrcReg[0] = arl->U.I.SrcReg[0]; - add->U.I.SrcReg[1].File = RC_FILE_CONSTANT; - add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants, - min_offset, &const_swizzle); - add->U.I.SrcReg[1].Swizzle = const_swizzle; - - arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index; - arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX; - - /* Rewrite offsets up to and excluding inst. */ - for (inst = arl->Next; inst != end; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - - for (unsigned i = 0; i < opcode->NumSrcRegs; i++) - if (inst->U.I.SrcReg[i].RelAddr) - inst->U.I.SrcReg[i].Index -= min_offset; - } -} - -static void rc_emulate_negative_addressing(struct radeon_compiler *compiler, void *user) -{ - struct r300_vertex_program_compiler * c = (struct r300_vertex_program_compiler*)compiler; - struct rc_instruction *inst, *lastARL = NULL; - int min_offset = 0; - - for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - - if (inst->U.I.Opcode == RC_OPCODE_ARL) { - if (lastARL != NULL && min_offset < 0) - transform_negative_addressing(c, lastARL, inst, min_offset); - - lastARL = inst; - min_offset = 0; - continue; - } - - for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { - if (inst->U.I.SrcReg[i].RelAddr && - inst->U.I.SrcReg[i].Index < 0) { - /* ARL must precede any indirect addressing. */ - if (lastARL == NULL) { - rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL."); - return; - } - - if (inst->U.I.SrcReg[i].Index < min_offset) - min_offset = inst->U.I.SrcReg[i].Index; - } - } - } - - if (lastARL != NULL && min_offset < 0) - transform_negative_addressing(c, lastARL, inst, min_offset); -} - -static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { - .IsNative = &swizzle_is_native, - .Split = 0 /* should never be called */ -}; - -void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) -{ - int is_r500 = c->Base.is_r500; - int opt = !c->Base.disable_optimizations; - - /* Lists of instruction transformations. */ - struct radeon_program_transformation alu_rewrite_r500[] = { - { &r300_transform_vertex_alu, 0 }, - { &r300_transform_trig_scale_vertex, 0 }, - { 0, 0 } - }; - - struct radeon_program_transformation alu_rewrite_r300[] = { - { &r300_transform_vertex_alu, 0 }, - { &r300_transform_trig_simple, 0 }, - { 0, 0 } - }; - - /* Note: These passes have to be done seperately from ALU rewrite, - * otherwise non-native ALU instructions with source conflits - * or non-native modifiers will not be treated properly. - */ - struct radeon_program_transformation emulate_modifiers[] = { - { &transform_nonnative_modifiers, 0 }, - { 0, 0 } - }; - - struct radeon_program_transformation resolve_src_conflicts[] = { - { &transform_source_conflicts, 0 }, - { 0, 0 } - }; - - /* List of compiler passes. */ - struct radeon_compiler_pass vs_list[] = { - /* NAME DUMP PREDICATE FUNCTION PARAM */ - {"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL}, - {"transform loops", 1, 1, rc_transform_loops, NULL}, - {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, - {"emulate negative addressing", 1, 1, rc_emulate_negative_addressing, NULL}, - {"native rewrite", 1, is_r500, rc_local_transform, alu_rewrite_r500}, - {"native rewrite", 1, !is_r500, rc_local_transform, alu_rewrite_r300}, - {"emulate modifiers", 1, !is_r500, rc_local_transform, emulate_modifiers}, - {"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_used}, - {"dataflow optimize", 1, opt, rc_optimize, NULL}, - /* This pass must be done after optimizations. */ - {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, - {"register allocation", 1, opt, allocate_temporary_registers, NULL}, - {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, - {"final code validation", 0, 1, rc_validate_final_shader, NULL}, - {"machine code generation", 0, 1, translate_vertex_program, NULL}, - {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, - {NULL, 0, 0, NULL, NULL} - }; - - c->Base.type = RC_VERTEX_PROGRAM; - c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; - - rc_run_compiler(&c->Base, vs_list); - - c->code->InputsRead = c->Base.Program.InputsRead; - c->code->OutputsWritten = c->Base.Program.OutputsWritten; - rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); -} diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c deleted file mode 100644 index 2bc0a87..0000000 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright 2009 Nicolai Hähnle - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "radeon_compiler.h" -#include "radeon_code.h" -#include "../r300_reg.h" - -#include - -static char* r300_vs_ve_ops[] = { - /* R300 vector ops */ - " VE_NO_OP", - " VE_DOT_PRODUCT", - " VE_MULTIPLY", - " VE_ADD", - " VE_MULTIPLY_ADD", - " VE_DISTANCE_FACTOR", - " VE_FRACTION", - " VE_MAXIMUM", - " VE_MINIMUM", - "VE_SET_GREATER_THAN_EQUAL", - " VE_SET_LESS_THAN", - " VE_MULTIPLYX2_ADD", - " VE_MULTIPLY_CLAMP", - " VE_FLT2FIX_DX", - " VE_FLT2FIX_DX_RND", - /* R500 vector ops */ - " VE_PRED_SET_EQ_PUSH", - " VE_PRED_SET_GT_PUSH", - " VE_PRED_SET_GTE_PUSH", - " VE_PRED_SET_NEQ_PUSH", - " VE_COND_WRITE_EQ", - " VE_COND_WRITE_GT", - " VE_COND_WRITE_GTE", - " VE_COND_WRITE_NEQ", - " VE_COND_MUX_EQ", - " VE_COND_MUX_GT", - " VE_COND_MUX_GTE", - " VE_SET_GREATER_THAN", - " VE_SET_EQUAL", - " VE_SET_NOT_EQUAL", - " (reserved)", - " (reserved)", - " (reserved)", -}; - -static char* r300_vs_me_ops[] = { - /* R300 math ops */ - " ME_NO_OP", - " ME_EXP_BASE2_DX", - " ME_LOG_BASE2_DX", - " ME_EXP_BASEE_FF", - " ME_LIGHT_COEFF_DX", - " ME_POWER_FUNC_FF", - " ME_RECIP_DX", - " ME_RECIP_FF", - " ME_RECIP_SQRT_DX", - " ME_RECIP_SQRT_FF", - " ME_MULTIPLY", - " ME_EXP_BASE2_FULL_DX", - " ME_LOG_BASE2_FULL_DX", - " ME_POWER_FUNC_FF_CLAMP_B", - "ME_POWER_FUNC_FF_CLAMP_B1", - "ME_POWER_FUNC_FF_CLAMP_01", - " ME_SIN", - " ME_COS", - /* R500 math ops */ - " ME_LOG_BASE2_IEEE", - " ME_RECIP_IEEE", - " ME_RECIP_SQRT_IEEE", - " ME_PRED_SET_EQ", - " ME_PRED_SET_GT", - " ME_PRED_SET_GTE", - " ME_PRED_SET_NEQ", - " ME_PRED_SET_CLR", - " ME_PRED_SET_INV", - " ME_PRED_SET_POP", - " ME_PRED_SET_RESTORE", - " (reserved)", - " (reserved)", - " (reserved)", -}; - -/* XXX refactor to avoid clashing symbols */ -static char* r300_vs_src_debug[] = { - "t", - "i", - "c", - "a", -}; - -static char* r300_vs_dst_debug[] = { - "t", - "a0", - "o", - "ox", - "a", - "i", - "u", - "u", -}; - -static char* r300_vs_swiz_debug[] = { - "X", - "Y", - "Z", - "W", - "0", - "1", - "U", - "U", -}; - - -static void r300_vs_op_dump(uint32_t op) -{ - fprintf(stderr, " dst: %d%s op: ", - (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); - if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) { - fprintf(stderr, "PRED %u", - (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1); - } - if (op & 0x80) { - if (op & 0x1) { - fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); - } else { - fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n"); - } - } else if (op & 0x40) { - fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]); - } else { - fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]); - } -} - -static void r300_vs_src_dump(uint32_t src) -{ - fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", - (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3], - src & (1 << 25) ? "-" : " ", - r300_vs_swiz_debug[(src >> 13) & 0x7], - src & (1 << 26) ? "-" : " ", - r300_vs_swiz_debug[(src >> 16) & 0x7], - src & (1 << 27) ? "-" : " ", - r300_vs_swiz_debug[(src >> 19) & 0x7], - src & (1 << 28) ? "-" : " ", - r300_vs_swiz_debug[(src >> 22) & 0x7]); -} - -void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user) -{ - struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler*)compiler; - struct r300_vertex_program_code * vs = c->code; - unsigned instrcount = vs->length / 4; - unsigned i; - - fprintf(stderr, "Final vertex program code:\n"); - - for(i = 0; i < instrcount; i++) { - unsigned offset = i*4; - unsigned src; - - fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]); - r300_vs_op_dump(vs->body.d[offset]); - - for(src = 0; src < 3; ++src) { - fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]); - r300_vs_src_dump(vs->body.d[offset+1+src]); - } - } - - fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops); - for(i = 0; i < vs->num_fc_ops; i++) { - switch((vs->fc_ops >> (i * 2)) & 0x3 ) { - case 0: fprintf(stderr, "NOP"); break; - case 1: fprintf(stderr, "JUMP"); break; - case 2: fprintf(stderr, "LOOP"); break; - case 3: fprintf(stderr, "JSR"); break; - } - if (c->Base.is_r500) { - fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n", - vs->fc_op_addrs.r500[i].uw, - vs->fc_op_addrs.r500[i].lw); - } else { - fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]); - } - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c deleted file mode 100644 index cf99f5e..0000000 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ /dev/null @@ -1,539 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "r500_fragprog.h" - -#include - -#include "radeon_compiler_util.h" -#include "radeon_list.h" -#include "radeon_variable.h" -#include "../r300_reg.h" - -/** - * Rewrite IF instructions to use the ALU result special register. - */ -int r500_transform_IF( - struct radeon_compiler * c, - struct rc_instruction * inst_if, - void *data) -{ - struct rc_variable * writer; - struct rc_list * writer_list, * list_ptr; - struct rc_list * var_list = rc_get_variables(c); - unsigned int generic_if = 0; - unsigned int alu_chan; - - if (inst_if->U.I.Opcode != RC_OPCODE_IF) { - return 0; - } - - writer_list = rc_variable_list_get_writers( - var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]); - if (!writer_list) { - generic_if = 1; - } else { - - /* Make sure it is safe for the writers to write to - * ALU Result */ - for (list_ptr = writer_list; list_ptr; - list_ptr = list_ptr->Next) { - struct rc_instruction * inst; - writer = list_ptr->Item; - /* We are going to modify the destination register - * of writer, so if it has a reader other than - * inst_if (aka ReaderCount > 1) we must fall back to - * our generic IF. - * If the writer has a lower IP than inst_if, this - * means that inst_if is above the writer in a loop. - * I'm not sure why this would ever happen, but - * if it does we want to make sure we fall back - * to our generic IF. */ - if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) { - generic_if = 1; - break; - } - - /* The ALU Result is not preserved across IF - * instructions, so if there is another IF - * instruction between writer and inst_if, then - * we need to fall back to generic IF. */ - for (inst = writer->Inst; inst != inst_if; inst = inst->Next) { - const struct rc_opcode_info * info = - rc_get_opcode_info(inst->U.I.Opcode); - if (info->IsFlowControl) { - generic_if = 1; - break; - } - } - if (generic_if) { - break; - } - } - } - - if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) { - alu_chan = RC_ALURESULT_X; - } else { - alu_chan = RC_ALURESULT_W; - } - if (generic_if) { - struct rc_instruction * inst_mov = - rc_insert_new_instruction(c, inst_if->Prev); - - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.WriteMask = 0; - inst_mov->U.I.DstReg.File = RC_FILE_NONE; - inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; - inst_mov->U.I.WriteALUResult = alu_chan; - inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; - if (alu_chan == RC_ALURESULT_X) { - inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( - inst_mov->U.I.SrcReg[0].Swizzle, - RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, - RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); - } else { - inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( - inst_mov->U.I.SrcReg[0].Swizzle, - RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, - RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z); - } - } else { - rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER; - unsigned int reverse_srcs = 0; - unsigned int preserve_opcode = 0; - for (list_ptr = writer_list; list_ptr; - list_ptr = list_ptr->Next) { - writer = list_ptr->Item; - switch(writer->Inst->U.I.Opcode) { - case RC_OPCODE_SEQ: - compare_func = RC_COMPARE_FUNC_EQUAL; - break; - case RC_OPCODE_SNE: - compare_func = RC_COMPARE_FUNC_NOTEQUAL; - break; - case RC_OPCODE_SLE: - reverse_srcs = 1; - /* Fall through */ - case RC_OPCODE_SGE: - compare_func = RC_COMPARE_FUNC_GEQUAL; - break; - case RC_OPCODE_SGT: - reverse_srcs = 1; - /* Fall through */ - case RC_OPCODE_SLT: - compare_func = RC_COMPARE_FUNC_LESS; - break; - default: - compare_func = RC_COMPARE_FUNC_NOTEQUAL; - preserve_opcode = 1; - break; - } - if (!preserve_opcode) { - writer->Inst->U.I.Opcode = RC_OPCODE_SUB; - } - writer->Inst->U.I.DstReg.WriteMask = 0; - writer->Inst->U.I.DstReg.File = RC_FILE_NONE; - writer->Inst->U.I.WriteALUResult = alu_chan; - writer->Inst->U.I.ALUResultCompare = compare_func; - if (reverse_srcs) { - struct rc_src_register temp_src; - temp_src = writer->Inst->U.I.SrcReg[0]; - writer->Inst->U.I.SrcReg[0] = - writer->Inst->U.I.SrcReg[1]; - writer->Inst->U.I.SrcReg[1] = temp_src; - } - } - } - - inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL; - inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; - inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE( - RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, - RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); - inst_if->U.I.SrcReg[0].Negate = 0; - - return 1; -} - -static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) -{ - unsigned int relevant; - int i; - - if (opcode == RC_OPCODE_TEX || - opcode == RC_OPCODE_TXB || - opcode == RC_OPCODE_TXP || - opcode == RC_OPCODE_TXD || - opcode == RC_OPCODE_TXL || - opcode == RC_OPCODE_KIL) { - if (reg.Abs) - return 0; - - if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) - return 0; - - for(i = 0; i < 4; ++i) { - unsigned int swz = GET_SWZ(reg.Swizzle, i); - if (swz == RC_SWIZZLE_UNUSED) { - reg.Negate &= ~(1 << i); - continue; - } - if (swz >= 4) - return 0; - } - - if (reg.Negate) - return 0; - - return 1; - } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) { - /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; - * if it doesn't fit perfectly into a .xyzw case... */ - if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate) - return 1; - - return 0; - } else { - /* ALU instructions support almost everything */ - relevant = 0; - for(i = 0; i < 3; ++i) { - unsigned int swz = GET_SWZ(reg.Swizzle, i); - if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) - relevant |= 1 << i; - } - if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return 0; - - return 1; - } -} - -/** - * Split source register access. - * - * The only thing we *cannot* do in an ALU instruction is per-component - * negation. - */ -static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask, - struct rc_swizzle_split * split) -{ - unsigned int negatebase[2] = { 0, 0 }; - int i; - - for(i = 0; i < 4; ++i) { - unsigned int swz = GET_SWZ(src.Swizzle, i); - if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i)) - continue; - negatebase[GET_BIT(src.Negate, i)] |= 1 << i; - } - - split->NumPhases = 0; - - for(i = 0; i <= 1; ++i) { - if (!negatebase[i]) - continue; - - split->Phase[split->NumPhases++] = negatebase[i]; - } -} - -struct rc_swizzle_caps r500_swizzle_caps = { - .IsNative = r500_swizzle_is_native, - .Split = r500_swizzle_split -}; - -static char *toswiz(int swiz_val) { - switch(swiz_val) { - case 0: return "R"; - case 1: return "G"; - case 2: return "B"; - case 3: return "A"; - case 4: return "0"; - case 5: return "H"; - case 6: return "1"; - case 7: return "U"; - } - return NULL; -} - -static char *toop(int op_val) -{ - char *str = NULL; - switch (op_val) { - case 0: str = "MAD"; break; - case 1: str = "DP3"; break; - case 2: str = "DP4"; break; - case 3: str = "D2A"; break; - case 4: str = "MIN"; break; - case 5: str = "MAX"; break; - case 6: str = "Reserved"; break; - case 7: str = "CND"; break; - case 8: str = "CMP"; break; - case 9: str = "FRC"; break; - case 10: str = "SOP"; break; - case 11: str = "MDH"; break; - case 12: str = "MDV"; break; - } - return str; -} - -static char *to_alpha_op(int op_val) -{ - char *str = NULL; - switch (op_val) { - case 0: str = "MAD"; break; - case 1: str = "DP"; break; - case 2: str = "MIN"; break; - case 3: str = "MAX"; break; - case 4: str = "Reserved"; break; - case 5: str = "CND"; break; - case 6: str = "CMP"; break; - case 7: str = "FRC"; break; - case 8: str = "EX2"; break; - case 9: str = "LN2"; break; - case 10: str = "RCP"; break; - case 11: str = "RSQ"; break; - case 12: str = "SIN"; break; - case 13: str = "COS"; break; - case 14: str = "MDH"; break; - case 15: str = "MDV"; break; - } - return str; -} - -static char *to_mask(int val) -{ - char *str = NULL; - switch(val) { - case 0: str = "NONE"; break; - case 1: str = "R"; break; - case 2: str = "G"; break; - case 3: str = "RG"; break; - case 4: str = "B"; break; - case 5: str = "RB"; break; - case 6: str = "GB"; break; - case 7: str = "RGB"; break; - case 8: str = "A"; break; - case 9: str = "AR"; break; - case 10: str = "AG"; break; - case 11: str = "ARG"; break; - case 12: str = "AB"; break; - case 13: str = "ARB"; break; - case 14: str = "AGB"; break; - case 15: str = "ARGB"; break; - } - return str; -} - -static char *to_texop(int val) -{ - switch(val) { - case 0: return "NOP"; - case 1: return "LD"; - case 2: return "TEXKILL"; - case 3: return "PROJ"; - case 4: return "LODBIAS"; - case 5: return "LOD"; - case 6: return "DXDY"; - } - return NULL; -} - -void r500FragmentProgramDump(struct radeon_compiler *c, void *user) -{ - struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; - struct r500_fragment_program_code *code = &compiler->code->code.r500; - int n, i; - uint32_t inst; - uint32_t inst0; - char *str = NULL; - fprintf(stderr, "R500 Fragment Program:\n--------\n"); - - for (n = 0; n < code->inst_end+1; n++) { - inst0 = inst = code->inst[n].inst0; - fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); - switch(inst & 0x3) { - case R500_INST_TYPE_ALU: str = "ALU"; break; - case R500_INST_TYPE_OUT: str = "OUT"; break; - case R500_INST_TYPE_FC: str = "FC"; break; - case R500_INST_TYPE_TEX: str = "TEX"; break; - }; - fprintf(stderr,"%s %s %s %s %s ", str, - inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", - inst & R500_INST_LAST ? "LAST" : "", - inst & R500_INST_NOP ? "NOP" : "", - inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); - fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), - to_mask((inst >> 15) & 0xf)); - - switch(inst0 & 0x3) { - case R500_INST_TYPE_ALU: - case R500_INST_TYPE_OUT: - fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); - inst = code->inst[n].inst1; - - fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", - inst & 0xff, (inst & (1<<8)) ? 'c' : 't', - (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', - (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', - (inst >> 30)); - - fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); - inst = code->inst[n].inst2; - fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", - inst & 0xff, (inst & (1<<8)) ? 'c' : 't', - (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', - (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', - (inst >> 30)); - fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); - inst = code->inst[n].inst3; - fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n", - (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), - (inst >> 11) & 0x3, - (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), - (inst >> 24) & 0x3, (inst >> 29) & 0x3); - - - fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); - inst = code->inst[n].inst4; - fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf), - (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", - (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, - (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, - (inst >> 29) & 0x3, - (inst >> 31) & 0x1); - - fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); - inst = code->inst[n].inst5; - fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), - (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", - (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), - (inst >> 23) & 0x3, - (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); - break; - case R500_INST_TYPE_FC: - fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2); - inst = code->inst[n].inst2; - /* JUMP_FUNC JUMP_ANY*/ - fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff, - (inst & R500_FC_JUMP_ANY) >> 5); - - /* OP */ - switch(inst & 0x7){ - case R500_FC_OP_JUMP: - fprintf(stderr, "JUMP"); - break; - case R500_FC_OP_LOOP: - fprintf(stderr, "LOOP"); - break; - case R500_FC_OP_ENDLOOP: - fprintf(stderr, "ENDLOOP"); - break; - case R500_FC_OP_REP: - fprintf(stderr, "REP"); - break; - case R500_FC_OP_ENDREP: - fprintf(stderr, "ENDREP"); - break; - case R500_FC_OP_BREAKLOOP: - fprintf(stderr, "BREAKLOOP"); - break; - case R500_FC_OP_BREAKREP: - fprintf(stderr, "BREAKREP"); - break; - case R500_FC_OP_CONTINUE: - fprintf(stderr, "CONTINUE"); - break; - } - fprintf(stderr," "); - /* A_OP */ - switch(inst & (0x3 << 6)){ - case R500_FC_A_OP_NONE: - fprintf(stderr, "NONE"); - break; - case R500_FC_A_OP_POP: - fprintf(stderr, "POP"); - break; - case R500_FC_A_OP_PUSH: - fprintf(stderr, "PUSH"); - break; - } - /* B_OP0 B_OP1 */ - for(i=0; i<2; i++){ - fprintf(stderr, " "); - switch(inst & (0x3 << (24 + (i * 2)))){ - /* R500_FC_B_OP0_NONE - * R500_FC_B_OP1_NONE */ - case 0: - fprintf(stderr, "NONE"); - break; - case R500_FC_B_OP0_DECR: - case R500_FC_B_OP1_DECR: - fprintf(stderr, "DECR"); - break; - case R500_FC_B_OP0_INCR: - case R500_FC_B_OP1_INCR: - fprintf(stderr, "INCR"); - break; - } - } - /*POP_CNT B_ELSE */ - fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4); - inst = code->inst[n].inst3; - /* JUMP_ADDR */ - fprintf(stderr, " %d", inst >> 16); - - if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){ - fprintf(stderr, " IGN_UNC"); - } - inst = code->inst[n].inst3; - fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst); - fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n", - inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31); - break; - case R500_INST_TYPE_TEX: - inst = code->inst[n].inst1; - fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, - to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", - (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); - inst = code->inst[n].inst2; - fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, - inst & 127, inst & (1<<7) ? "(rel)" : "", - toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), - toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), - (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", - toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), - toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); - - fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); - break; - } - fprintf(stderr,"\n"); - } - -} diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h deleted file mode 100644 index 6aa448c..0000000 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/* - * Authors: - * Ben Skeggs - * Jerome Glisse - */ -#ifndef __R500_FRAGPROG_H_ -#define __R500_FRAGPROG_H_ - -#include "radeon_compiler.h" -#include "radeon_swizzle.h" - -extern void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user); - -extern void r500FragmentProgramDump(struct radeon_compiler *c, void *user); - -extern struct rc_swizzle_caps r500_swizzle_caps; - -extern int r500_transform_IF( - struct radeon_compiler * c, - struct rc_instruction * inst_if, - void* data); - -#endif diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c deleted file mode 100644 index c30cd75..0000000 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ /dev/null @@ -1,678 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * Copyright 2008 Corbin Simpson - * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * \file - * - * \author Ben Skeggs - * - * \author Jerome Glisse - * - * \author Corbin Simpson - * - */ - -#include "r500_fragprog.h" - -#include "../r300_reg.h" - -#include "radeon_program_pair.h" - -#define PROG_CODE \ - struct r500_fragment_program_code *code = &c->code->code.r500 - -#define error(fmt, args...) do { \ - rc_error(&c->Base, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ - } while(0) - - -struct branch_info { - int If; - int Else; - int Endif; -}; - -struct r500_loop_info { - int BgnLoop; - - int BranchDepth; - int * Brks; - int BrkCount; - int BrkReserved; - - int * Conts; - int ContCount; - int ContReserved; -}; - -struct emit_state { - struct radeon_compiler * C; - struct r500_fragment_program_code * Code; - - struct branch_info * Branches; - unsigned int CurrentBranchDepth; - unsigned int BranchesReserved; - - struct r500_loop_info * Loops; - unsigned int CurrentLoopDepth; - unsigned int LoopsReserved; - - unsigned int MaxBranchDepth; - -}; - -static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) -{ - switch(opcode) { - case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; - case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND; - case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; - case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; - case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; - case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; - case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; - default: - error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); - /* fall through */ - case RC_OPCODE_NOP: - /* fall through */ - case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; - case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; - case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; - case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; - } -} - -static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) -{ - switch(opcode) { - case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; - case RC_OPCODE_CND: return R500_ALPHA_OP_CND; - case RC_OPCODE_COS: return R500_ALPHA_OP_COS; - case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; - case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; - case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; - case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; - case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; - case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; - case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; - default: - error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); - /* fall through */ - case RC_OPCODE_NOP: - /* fall through */ - case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; - case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; - case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; - case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; - case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; - case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; - } -} - -static unsigned int fix_hw_swizzle(unsigned int swz) -{ - switch (swz) { - case RC_SWIZZLE_ZERO: - case RC_SWIZZLE_UNUSED: - swz = 4; - break; - case RC_SWIZZLE_HALF: - swz = 5; - break; - case RC_SWIZZLE_ONE: - swz = 6; - break; - } - - return swz; -} - -static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) -{ - unsigned int t = inst->RGB.Arg[arg].Source; - int comp; - t |= inst->RGB.Arg[arg].Negate << 11; - t |= inst->RGB.Arg[arg].Abs << 12; - - for(comp = 0; comp < 3; ++comp) - t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); - - return t; -} - -static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) -{ - unsigned int t = inst->Alpha.Arg[i].Source; - t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2; - t |= inst->Alpha.Arg[i].Negate << 5; - t |= inst->Alpha.Arg[i].Abs << 6; - return t; -} - -static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func) -{ - switch(func) { - case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; - case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; - case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; - case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; - default: - rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func); - return 0; - } -} - -static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) -{ - if (index > code->max_temp_idx) - code->max_temp_idx = index; -} - -static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) -{ - /* From docs: - * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST. - * MSB = 1 << 7 */ - if (!src.Used) - return 1 << 7; - - if (src.File == RC_FILE_CONSTANT) { - return src.Index | R500_RGB_ADDR0_CONST; - } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { - use_temporary(code, src.Index); - return src.Index; - } - - return 0; -} - -/** - * NOP the specified instruction if it is not a texture lookup. - */ -static void alu_nop(struct r300_fragment_program_compiler *c, int ip) -{ - PROG_CODE; - - if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { - code->inst[ip].inst0 |= R500_INST_NOP; - } -} - -/** - * Emit a paired ALU instruction. - */ -static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) -{ - int ip; - PROG_CODE; - - if (code->inst_end >= c->Base.max_alu_insts-1) { - error("emit_alu: Too many instructions"); - return; - } - - ip = ++code->inst_end; - - /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ - if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || - inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { - if (ip > 0) { - alu_nop(c, ip - 1); - } - } - - code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); - code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); - - if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { - code->inst[ip].inst0 = R500_INST_TYPE_OUT; - if (inst->WriteALUResult) { - error("Cannot write output and ALU result at the same time"); - return; - } - } else { - code->inst[ip].inst0 = R500_INST_TYPE_ALU; - } - code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; - - code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11); - code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0; - code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); - if (inst->Nop) { - code->inst[ip].inst0 |= R500_INST_NOP; - } - if (inst->Alpha.DepthWriteMask) { - code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; - c->code->writes_depth = 1; - } - - code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); - code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); - use_temporary(code, inst->Alpha.DestIndex); - use_temporary(code, inst->RGB.DestIndex); - - if (inst->RGB.Saturate) - code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; - if (inst->Alpha.Saturate) - code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; - - /* Set the presubtract operation. */ - switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { - case RC_PRESUB_BIAS: - code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0; - break; - case RC_PRESUB_SUB: - code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; - break; - case RC_PRESUB_ADD: - code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0; - break; - case RC_PRESUB_INV: - code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0; - break; - default: - break; - } - switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { - case RC_PRESUB_BIAS: - code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0; - break; - case RC_PRESUB_SUB: - code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0; - break; - case RC_PRESUB_ADD: - code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0; - break; - case RC_PRESUB_INV: - code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0; - break; - default: - break; - } - - code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); - code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); - code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); - - code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); - code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); - code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); - - code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; - code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; - code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; - - code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; - code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; - code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; - - code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); - code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); - - if (inst->WriteALUResult) { - code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; - - if (inst->WriteALUResult == RC_ALURESULT_X) - code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; - else - code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; - - code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); - } -} - -static unsigned int translate_strq_swizzle(unsigned int swizzle) -{ - unsigned int swiz = 0; - int i; - for (i = 0; i < 4; i++) - swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; - return swiz; -} - -/** - * Emit a single TEX instruction - */ -static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) -{ - int ip; - PROG_CODE; - - if (code->inst_end >= c->Base.max_alu_insts-1) { - error("emit_tex: Too many instructions"); - return 0; - } - - ip = ++code->inst_end; - - code->inst[ip].inst0 = R500_INST_TYPE_TEX - | (inst->DstReg.WriteMask << 11) - | R500_INST_TEX_SEM_WAIT; - code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) - | R500_TEX_SEM_ACQUIRE; - - if (inst->TexSrcTarget == RC_TEXTURE_RECT) - code->inst[ip].inst1 |= R500_TEX_UNSCALED; - - switch (inst->Opcode) { - case RC_OPCODE_KIL: - code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; - break; - case RC_OPCODE_TEX: - code->inst[ip].inst1 |= R500_TEX_INST_LD; - break; - case RC_OPCODE_TXB: - code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; - break; - case RC_OPCODE_TXP: - code->inst[ip].inst1 |= R500_TEX_INST_PROJ; - break; - case RC_OPCODE_TXD: - code->inst[ip].inst1 |= R500_TEX_INST_DXDY; - break; - case RC_OPCODE_TXL: - code->inst[ip].inst1 |= R500_TEX_INST_LOD; - break; - default: - error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); - } - - use_temporary(code, inst->SrcReg[0].Index); - if (inst->Opcode != RC_OPCODE_KIL) - use_temporary(code, inst->DstReg.Index); - - code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) - | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) - | R500_TEX_DST_ADDR(inst->DstReg.Index) - | (GET_SWZ(inst->TexSwizzle, 0) << 24) - | (GET_SWZ(inst->TexSwizzle, 1) << 26) - | (GET_SWZ(inst->TexSwizzle, 2) << 28) - | (GET_SWZ(inst->TexSwizzle, 3) << 30) - ; - - if (inst->Opcode == RC_OPCODE_TXD) { - use_temporary(code, inst->SrcReg[1].Index); - use_temporary(code, inst->SrcReg[2].Index); - - /* DX and DY parameters are specified in a separate register. */ - code->inst[ip].inst3 = - R500_DX_ADDR(inst->SrcReg[1].Index) | - (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) | - R500_DY_ADDR(inst->SrcReg[2].Index) | - (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24); - } - - return 1; -} - -static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) -{ - unsigned int newip; - - if (s->Code->inst_end >= s->C->max_alu_insts-1) { - rc_error(s->C, "emit_tex: Too many instructions"); - return; - } - - newip = ++s->Code->inst_end; - - /* Currently all loops use the same integer constant to intialize - * the loop variables. */ - if(!s->Code->int_constants[0]) { - s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); - s->Code->int_constant_count = 1; - } - s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; - - switch(inst->U.I.Opcode){ - struct branch_info * branch; - struct r500_loop_info * loop; - case RC_OPCODE_BGNLOOP: - memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info, - s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); - - loop = &s->Loops[s->CurrentLoopDepth++]; - memset(loop, 0, sizeof(struct r500_loop_info)); - loop->BranchDepth = s->CurrentBranchDepth; - loop->BgnLoop = newip; - - s->Code->inst[newip].inst2 = R500_FC_OP_LOOP - | R500_FC_JUMP_FUNC(0x00) - | R500_FC_IGNORE_UNCOVERED - ; - break; - case RC_OPCODE_BRK: - loop = &s->Loops[s->CurrentLoopDepth - 1]; - memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, - loop->BrkCount, loop->BrkReserved, 1); - - loop->Brks[loop->BrkCount++] = newip; - s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP - | R500_FC_JUMP_FUNC(0xff) - | R500_FC_B_OP1_DECR - | R500_FC_B_POP_CNT( - s->CurrentBranchDepth - loop->BranchDepth) - | R500_FC_IGNORE_UNCOVERED - ; - break; - - case RC_OPCODE_CONT: - loop = &s->Loops[s->CurrentLoopDepth - 1]; - memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, - loop->ContCount, loop->ContReserved, 1); - loop->Conts[loop->ContCount++] = newip; - s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE - | R500_FC_JUMP_FUNC(0xff) - | R500_FC_B_OP1_DECR - | R500_FC_B_POP_CNT( - s->CurrentBranchDepth - loop->BranchDepth) - | R500_FC_IGNORE_UNCOVERED - ; - break; - - case RC_OPCODE_ENDLOOP: - { - loop = &s->Loops[s->CurrentLoopDepth - 1]; - /* Emit ENDLOOP */ - s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP - | R500_FC_JUMP_FUNC(0xff) - | R500_FC_JUMP_ANY - | R500_FC_IGNORE_UNCOVERED - ; - /* The constant integer at index 0 is used by all loops. */ - s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) - | R500_FC_JUMP_ADDR(loop->BgnLoop + 1) - ; - - /* Set jump address and int constant for BGNLOOP */ - s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) - | R500_FC_JUMP_ADDR(newip) - ; - - /* Set jump address for the BRK instructions. */ - while(loop->BrkCount--) { - s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = - R500_FC_JUMP_ADDR(newip + 1); - } - - /* Set jump address for CONT instructions. */ - while(loop->ContCount--) { - s->Code->inst[loop->Conts[loop->ContCount]].inst3 = - R500_FC_JUMP_ADDR(newip); - } - s->CurrentLoopDepth--; - break; - } - case RC_OPCODE_IF: - if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { - rc_error(s->C, "Branch depth exceeds hardware limit"); - return; - } - memory_pool_array_reserve(&s->C->Pool, struct branch_info, - s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); - - branch = &s->Branches[s->CurrentBranchDepth++]; - branch->If = newip; - branch->Else = -1; - branch->Endif = -1; - - if (s->CurrentBranchDepth > s->MaxBranchDepth) - s->MaxBranchDepth = s->CurrentBranchDepth; - - /* actual instruction is filled in at ENDIF time */ - break; - - case RC_OPCODE_ELSE: - if (!s->CurrentBranchDepth) { - rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); - return; - } - - branch = &s->Branches[s->CurrentBranchDepth - 1]; - branch->Else = newip; - - /* actual instruction is filled in at ENDIF time */ - break; - - case RC_OPCODE_ENDIF: - if (!s->CurrentBranchDepth) { - rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); - return; - } - - branch = &s->Branches[s->CurrentBranchDepth - 1]; - branch->Endif = newip; - - s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP - | R500_FC_A_OP_NONE /* no address stack */ - | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ - | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ - | R500_FC_B_OP1_NONE /* no branch counter if stay */ - | R500_FC_B_POP_CNT(1) - ; - s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); - s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP - | R500_FC_A_OP_NONE /* no address stack */ - | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ - | R500_FC_B_OP0_INCR /* increment branch counter if stay */ - | R500_FC_IGNORE_UNCOVERED - ; - - if (branch->Else >= 0) { - /* increment branch counter also if jump */ - s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; - s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); - - s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP - | R500_FC_A_OP_NONE /* no address stack */ - | R500_FC_B_ELSE /* all active pixels want to jump */ - | R500_FC_B_OP0_NONE /* no counter op if stay */ - | R500_FC_B_OP1_DECR /* decrement branch counter if jump */ - | R500_FC_B_POP_CNT(1) - ; - s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); - } else { - /* don't touch branch counter on jump */ - s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; - s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); - } - - - s->CurrentBranchDepth--; - break; - default: - rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name); - } -} - -void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) -{ - struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; - struct emit_state s; - struct r500_fragment_program_code *code = &compiler->code->code.r500; - - memset(&s, 0, sizeof(s)); - s.C = &compiler->Base; - s.Code = code; - - memset(code, 0, sizeof(*code)); - code->max_temp_idx = 1; - code->inst_end = -1; - - for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; - inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; - inst = inst->Next) { - if (inst->Type == RC_INSTRUCTION_NORMAL) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - - if (opcode->IsFlowControl) { - emit_flowcontrol(&s, inst); - } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { - continue; - } else { - emit_tex(compiler, &inst->U.I); - } - } else { - emit_paired(compiler, &inst->U.P); - } - } - - if (code->max_temp_idx >= compiler->Base.max_temp_regs) - rc_error(&compiler->Base, "Too many hardware temporaries used"); - - if (compiler->Base.Error) - return; - - if (code->inst_end == -1 || - (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { - int ip; - - /* This may happen when dead-code elimination is disabled or - * when most of the fragment program logic is leading to a KIL */ - if (code->inst_end >= compiler->Base.max_alu_insts-1) { - rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); - return; - } - - ip = ++code->inst_end; - code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; - } - - /* Enable full flow control mode if we are using loops or have if - * statements nested at least four deep. */ - if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { - if (code->max_temp_idx < 1) - code->max_temp_idx = 1; - - code->us_fc_ctrl |= R500_FC_FULL_FC_EN; - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c deleted file mode 100644 index 6842fb8..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.c +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_code.h" - -#include -#include -#include - -#include "radeon_program.h" - -void rc_constants_init(struct rc_constant_list * c) -{ - memset(c, 0, sizeof(*c)); -} - -/** - * Copy a constants structure, assuming that the destination structure - * is not initialized. - */ -void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src) -{ - dst->Constants = malloc(sizeof(struct rc_constant) * src->Count); - memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count); - dst->Count = src->Count; - dst->_Reserved = src->Count; -} - -void rc_constants_destroy(struct rc_constant_list * c) -{ - free(c->Constants); - memset(c, 0, sizeof(*c)); -} - -unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant) -{ - unsigned index = c->Count; - - if (c->Count >= c->_Reserved) { - struct rc_constant * newlist; - - c->_Reserved = c->_Reserved * 2; - if (!c->_Reserved) - c->_Reserved = 16; - - newlist = malloc(sizeof(struct rc_constant) * c->_Reserved); - memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count); - - free(c->Constants); - c->Constants = newlist; - } - - c->Constants[index] = *constant; - c->Count++; - - return index; -} - - -/** - * Add a state vector to the constant list, while trying to avoid duplicates. - */ -unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1) -{ - unsigned index; - struct rc_constant constant; - - for(index = 0; index < c->Count; ++index) { - if (c->Constants[index].Type == RC_CONSTANT_STATE) { - if (c->Constants[index].u.State[0] == state0 && - c->Constants[index].u.State[1] == state1) - return index; - } - } - - memset(&constant, 0, sizeof(constant)); - constant.Type = RC_CONSTANT_STATE; - constant.Size = 4; - constant.u.State[0] = state0; - constant.u.State[1] = state1; - - return rc_constants_add(c, &constant); -} - - -/** - * Add an immediate vector to the constant list, while trying to avoid - * duplicates. - */ -unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data) -{ - unsigned index; - struct rc_constant constant; - - for(index = 0; index < c->Count; ++index) { - if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { - if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4)) - return index; - } - } - - memset(&constant, 0, sizeof(constant)); - constant.Type = RC_CONSTANT_IMMEDIATE; - constant.Size = 4; - memcpy(constant.u.Immediate, data, sizeof(float) * 4); - - return rc_constants_add(c, &constant); -} - - -/** - * Add an immediate scalar to the constant list, while trying to avoid - * duplicates. - */ -unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle) -{ - unsigned index; - int free_index = -1; - struct rc_constant constant; - - for(index = 0; index < c->Count; ++index) { - if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { - unsigned comp; - for(comp = 0; comp < c->Constants[index].Size; ++comp) { - if (c->Constants[index].u.Immediate[comp] == data) { - *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); - return index; - } - } - - if (c->Constants[index].Size < 4) - free_index = index; - } - } - - if (free_index >= 0) { - unsigned comp = c->Constants[free_index].Size++; - c->Constants[free_index].u.Immediate[comp] = data; - *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); - return free_index; - } - - memset(&constant, 0, sizeof(constant)); - constant.Type = RC_CONSTANT_IMMEDIATE; - constant.Size = 1; - constant.u.Immediate[0] = data; - *swizzle = RC_SWIZZLE_XXXX; - - return rc_constants_add(c, &constant); -} - -void rc_constants_print(struct rc_constant_list * c) -{ - unsigned int i; - for(i = 0; i < c->Count; i++) { - if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) { - float * values = c->Constants[i].u.Immediate; - fprintf(stderr, "CONST[%u] = " - "{ %10.4f %10.4f %10.4f %10.4f }\n", - i, values[0],values[1], values[2], values[3]); - } - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h deleted file mode 100644 index 67e6acf..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Copyright 2009 Nicolai Hähnle - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef RADEON_CODE_H -#define RADEON_CODE_H - -#include - -#define R300_PFS_MAX_ALU_INST 64 -#define R300_PFS_MAX_TEX_INST 32 -#define R300_PFS_MAX_TEX_INDIRECT 4 -#define R300_PFS_NUM_TEMP_REGS 32 -#define R300_PFS_NUM_CONST_REGS 32 - -#define R400_PFS_MAX_ALU_INST 512 -#define R400_PFS_MAX_TEX_INST 512 - -#define R500_PFS_MAX_INST 512 -#define R500_PFS_NUM_TEMP_REGS 128 -#define R500_PFS_NUM_CONST_REGS 256 -#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32 -#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4 - - -#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) - -enum { - /** - * External constants are constants whose meaning is unknown to this - * compiler. For example, a Mesa gl_program's constants are turned - * into external constants. - */ - RC_CONSTANT_EXTERNAL = 0, - - RC_CONSTANT_IMMEDIATE, - - /** - * Constant referring to state that is known by this compiler, - * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state. - */ - RC_CONSTANT_STATE -}; - -enum { - RC_STATE_SHADOW_AMBIENT = 0, - - RC_STATE_R300_WINDOW_DIMENSION, - RC_STATE_R300_TEXRECT_FACTOR, - RC_STATE_R300_TEXSCALE_FACTOR, - RC_STATE_R300_VIEWPORT_SCALE, - RC_STATE_R300_VIEWPORT_OFFSET -}; - -struct rc_constant { - unsigned Type:2; /**< RC_CONSTANT_xxx */ - unsigned Size:3; - - union { - unsigned External; - float Immediate[4]; - unsigned State[2]; - } u; -}; - -struct rc_constant_list { - struct rc_constant * Constants; - unsigned Count; - - unsigned _Reserved; -}; - -void rc_constants_init(struct rc_constant_list * c); -void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src); -void rc_constants_destroy(struct rc_constant_list * c); -unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant); -unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2); -unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); -unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); -void rc_constants_print(struct rc_constant_list * c); - -/** - * Compare functions. - * - * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you - * the correct GL compare function. - */ -typedef enum { - RC_COMPARE_FUNC_NEVER = 0, - RC_COMPARE_FUNC_LESS, - RC_COMPARE_FUNC_EQUAL, - RC_COMPARE_FUNC_LEQUAL, - RC_COMPARE_FUNC_GREATER, - RC_COMPARE_FUNC_NOTEQUAL, - RC_COMPARE_FUNC_GEQUAL, - RC_COMPARE_FUNC_ALWAYS -} rc_compare_func; - -/** - * Coordinate wrapping modes. - * - * These are not quite the same as their GL counterparts yet. - */ -typedef enum { - RC_WRAP_NONE = 0, - RC_WRAP_REPEAT, - RC_WRAP_MIRRORED_REPEAT, - RC_WRAP_MIRRORED_CLAMP -} rc_wrap_mode; - -/** - * Stores state that influences the compilation of a fragment program. - */ -struct r300_fragment_program_external_state { - struct { - /** - * This field contains swizzle for some lowering passes - * (shadow comparison, unorm->snorm conversion) - */ - unsigned texture_swizzle:12; - - /** - * If the sampler is used as a shadow sampler, - * this field specifies the compare function. - * - * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). - * \sa rc_compare_func - */ - unsigned texture_compare_func : 3; - - /** - * No matter what the sampler type is, - * this field turns it into a shadow sampler. - */ - unsigned compare_mode_enabled : 1; - - /** - * If the sampler will receive non-normalized coords, - * this field is set. The scaling factor is given by - * RC_STATE_R300_TEXRECT_FACTOR. - */ - unsigned non_normalized_coords : 1; - - /** - * This field specifies wrapping modes for the sampler. - * - * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths - * will be performed on the coordinates. - */ - unsigned wrap_mode : 3; - - /** - * The coords are scaled after applying the wrap mode emulation - * and right before texture fetch. The scaling factor is given by - * RC_STATE_R300_TEXSCALE_FACTOR. */ - unsigned clamp_and_scale_before_fetch : 1; - - /** - * Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM - * in the shader. - */ - unsigned convert_unorm_to_snorm:1; - } unit[16]; - - unsigned frag_clamp:1; -}; - - - -struct r300_fragment_program_node { - int tex_offset; /**< first tex instruction */ - int tex_end; /**< last tex instruction, relative to tex_offset */ - int alu_offset; /**< first ALU instruction */ - int alu_end; /**< last ALU instruction, relative to alu_offset */ - int flags; -}; - -/** - * Stores an R300 fragment program in its compiled-to-hardware form. - */ -struct r300_fragment_program_code { - struct { - unsigned int length; /**< total # of texture instructions used */ - uint32_t inst[R400_PFS_MAX_TEX_INST]; - } tex; - - struct { - unsigned int length; /**< total # of ALU instructions used */ - struct { - uint32_t rgb_inst; - uint32_t rgb_addr; - uint32_t alpha_inst; - uint32_t alpha_addr; - uint32_t r400_ext_addr; - } inst[R400_PFS_MAX_ALU_INST]; - } alu; - - uint32_t config; /* US_CONFIG */ - uint32_t pixsize; /* US_PIXSIZE */ - uint32_t code_offset; /* US_CODE_OFFSET */ - uint32_t r400_code_offset_ext; /* US_CODE_EXT */ - uint32_t code_addr[4]; /* US_CODE_ADDR */ - /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries - * for r400 cards */ - unsigned int r390_mode:1; -}; - - -struct r500_fragment_program_code { - struct { - uint32_t inst0; - uint32_t inst1; - uint32_t inst2; - uint32_t inst3; - uint32_t inst4; - uint32_t inst5; - } inst[R500_PFS_MAX_INST]; - - int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ - - int max_temp_idx; - - uint32_t us_fc_ctrl; - - uint32_t int_constants[32]; - uint32_t int_constant_count; -}; - -struct rX00_fragment_program_code { - union { - struct r300_fragment_program_code r300; - struct r500_fragment_program_code r500; - } code; - - unsigned writes_depth:1; - - struct rc_constant_list constants; - unsigned *constants_remap_table; -}; - - -#define R300_VS_MAX_ALU 256 -#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4) -#define R500_VS_MAX_ALU 1024 -#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) -#define R300_VS_MAX_TEMPS 32 -/* This is the max for all chipsets (r300-r500) */ -#define R300_VS_MAX_FC_OPS 16 -/* The r500 maximum depth is not just for loops, but any combination of loops - * and subroutine jumps. */ -#define R500_VS_MAX_FC_DEPTH 8 -#define R300_VS_MAX_LOOP_DEPTH 1 - -#define VSF_MAX_INPUTS 32 -#define VSF_MAX_OUTPUTS 32 - -struct r300_vertex_program_code { - int length; - union { - uint32_t d[R500_VS_MAX_ALU_DWORDS]; - float f[R500_VS_MAX_ALU_DWORDS]; - } body; - - int pos_end; - int num_temporaries; /* Number of temp vars used by program */ - int inputs[VSF_MAX_INPUTS]; - int outputs[VSF_MAX_OUTPUTS]; - - struct rc_constant_list constants; - unsigned *constants_remap_table; - - uint32_t InputsRead; - uint32_t OutputsWritten; - - unsigned int num_fc_ops; - uint32_t fc_ops; - union { - uint32_t r300[R300_VS_MAX_FC_OPS]; - struct { - uint32_t lw; - uint32_t uw; - } r500[R300_VS_MAX_FC_OPS]; - } fc_op_addrs; - int32_t fc_loop_index[R300_VS_MAX_FC_OPS]; -}; - -#endif /* RADEON_CODE_H */ - diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c deleted file mode 100644 index b793672..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ /dev/null @@ -1,489 +0,0 @@ -/* - * Copyright 2009 Nicolai Hähnle - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "radeon_compiler.h" - -#include -#include -#include - -#include "radeon_dataflow.h" -#include "radeon_program.h" -#include "radeon_program_pair.h" -#include "radeon_compiler_util.h" - - -void rc_init(struct radeon_compiler * c) -{ - memset(c, 0, sizeof(*c)); - - memory_pool_init(&c->Pool); - c->Program.Instructions.Prev = &c->Program.Instructions; - c->Program.Instructions.Next = &c->Program.Instructions; - c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; -} - -void rc_destroy(struct radeon_compiler * c) -{ - rc_constants_destroy(&c->Program.Constants); - memory_pool_destroy(&c->Pool); - free(c->ErrorMsg); -} - -void rc_debug(struct radeon_compiler * c, const char * fmt, ...) -{ - va_list ap; - - if (!(c->Debug & RC_DBG_LOG)) - return; - - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); -} - -void rc_error(struct radeon_compiler * c, const char * fmt, ...) -{ - va_list ap; - - c->Error = 1; - - if (!c->ErrorMsg) { - /* Only remember the first error */ - char buf[1024]; - int written; - - va_start(ap, fmt); - written = vsnprintf(buf, sizeof(buf), fmt, ap); - va_end(ap); - - if (written < sizeof(buf)) { - c->ErrorMsg = strdup(buf); - } else { - c->ErrorMsg = malloc(written + 1); - - va_start(ap, fmt); - vsnprintf(c->ErrorMsg, written + 1, fmt, ap); - va_end(ap); - } - } - - if (c->Debug & RC_DBG_LOG) { - fprintf(stderr, "r300compiler error: "); - - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); - } -} - -int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) -{ - rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); - return 1; -} - -/** - * Recompute c->Program.InputsRead and c->Program.OutputsWritten - * based on which inputs and outputs are actually referenced - * in program instructions. - */ -void rc_calculate_inputs_outputs(struct radeon_compiler * c) -{ - struct rc_instruction *inst; - - c->Program.InputsRead = 0; - c->Program.OutputsWritten = 0; - - for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) - { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - int i; - - for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) - c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index; - } - - if (opcode->HasDstReg) { - if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) - c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index; - } - } -} - -/** - * Rewrite the program such that everything that source the given input - * register will source new_input instead. - */ -void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input) -{ - struct rc_instruction * inst; - - c->Program.InputsRead &= ~(1 << input); - - for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - unsigned i; - - for(i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) { - inst->U.I.SrcReg[i].File = new_input.File; - inst->U.I.SrcReg[i].Index = new_input.Index; - inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle); - if (!inst->U.I.SrcReg[i].Abs) { - inst->U.I.SrcReg[i].Negate ^= new_input.Negate; - inst->U.I.SrcReg[i].Abs = new_input.Abs; - } - - c->Program.InputsRead |= 1 << new_input.Index; - } - } - } -} - - -/** - * Rewrite the program such that everything that writes into the given - * output register will instead write to new_output. The new_output - * writemask is honoured. - */ -void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) -{ - struct rc_instruction * inst; - - c->Program.OutputsWritten &= ~(1 << output); - - for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - - if (opcode->HasDstReg) { - if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { - inst->U.I.DstReg.Index = new_output; - inst->U.I.DstReg.WriteMask &= writemask; - - c->Program.OutputsWritten |= 1 << new_output; - } - } - } -} - - -/** - * Rewrite the program such that a given output is duplicated. - */ -void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) -{ - unsigned tempreg = rc_find_free_temporary(c); - struct rc_instruction * inst; - - for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - - if (opcode->HasDstReg) { - if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = tempreg; - } - } - } - - inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); - inst->U.I.Opcode = RC_OPCODE_MOV; - inst->U.I.DstReg.File = RC_FILE_OUTPUT; - inst->U.I.DstReg.Index = output; - - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = tempreg; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - - inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); - inst->U.I.Opcode = RC_OPCODE_MOV; - inst->U.I.DstReg.File = RC_FILE_OUTPUT; - inst->U.I.DstReg.Index = dup_output; - - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = tempreg; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - - c->Program.OutputsWritten |= 1 << dup_output; -} - - -/** - * Introduce standard code fragment to deal with fragment.position. - */ -void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, - int full_vtransform) -{ - unsigned tempregi = rc_find_free_temporary(c); - struct rc_instruction * inst_rcp; - struct rc_instruction * inst_mul; - struct rc_instruction * inst_mad; - struct rc_instruction * inst; - - c->Program.InputsRead &= ~(1 << wpos); - c->Program.InputsRead |= 1 << new_input; - - /* perspective divide */ - inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); - inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - - inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->U.I.DstReg.Index = tempregi; - inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - - inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; - inst_rcp->U.I.SrcReg[0].Index = new_input; - inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; - - inst_mul = rc_insert_new_instruction(c, inst_rcp); - inst_mul->U.I.Opcode = RC_OPCODE_MUL; - - inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mul->U.I.DstReg.Index = tempregi; - inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; - - inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; - inst_mul->U.I.SrcReg[0].Index = new_input; - - inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mul->U.I.SrcReg[1].Index = tempregi; - inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - - /* viewport transformation */ - inst_mad = rc_insert_new_instruction(c, inst_mul); - inst_mad->U.I.Opcode = RC_OPCODE_MAD; - - inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->U.I.DstReg.Index = tempregi; - inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; - - inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[0].Index = tempregi; - inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; - - inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; - inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; - - inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; - inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; - - if (full_vtransform) { - inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); - inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); - } else { - inst_mad->U.I.SrcReg[1].Index = - inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); - } - - for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - unsigned i; - - for(i = 0; i < opcode->NumSrcRegs; i++) { - if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && - inst->U.I.SrcReg[i].Index == wpos) { - inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[i].Index = tempregi; - } - } - } -} - - -/** - * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. - * Gallium and OpenGL define it the other way around. - * - * So let's just negate FACE at the beginning of the shader and rewrite the rest - * of the shader to read from the newly allocated temporary. - */ -void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) -{ - unsigned tempregi = rc_find_free_temporary(c); - struct rc_instruction *inst_add; - struct rc_instruction *inst; - - /* perspective divide */ - inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); - inst_add->U.I.Opcode = RC_OPCODE_ADD; - - inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_add->U.I.DstReg.Index = tempregi; - inst_add->U.I.DstReg.WriteMask = RC_MASK_X; - - inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; - inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; - - inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; - inst_add->U.I.SrcReg[1].Index = face; - inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; - inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; - - for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - unsigned i; - - for(i = 0; i < opcode->NumSrcRegs; i++) { - if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && - inst->U.I.SrcReg[i].Index == face) { - inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[i].Index = tempregi; - } - } - } -} - -static void reg_count_callback(void * userdata, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) -{ - int *max_reg = userdata; - if (file == RC_FILE_TEMPORARY) - (int)index > *max_reg ? *max_reg = index : 0; -} - -void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) -{ - int max_reg = -1; - struct rc_instruction * tmp; - memset(s, 0, sizeof(*s)); - - for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; - tmp = tmp->Next){ - const struct rc_opcode_info * info; - rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); - if (tmp->Type == RC_INSTRUCTION_NORMAL) { - info = rc_get_opcode_info(tmp->U.I.Opcode); - if (info->Opcode == RC_OPCODE_BEGIN_TEX) - continue; - if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) - s->num_presub_ops++; - } else { - if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) - s->num_presub_ops++; - if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) - s->num_presub_ops++; - /* Assuming alpha will never be a flow control or - * a tex instruction. */ - if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) - s->num_alpha_insts++; - if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) - s->num_rgb_insts++; - info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); - } - if (info->IsFlowControl) - s->num_fc_insts++; - if (info->HasTexture) - s->num_tex_insts++; - s->num_insts++; - } - s->num_temp_regs = max_reg + 1; -} - -static void print_stats(struct radeon_compiler * c) -{ - struct rc_program_stats s; - - if (c->initial_num_insts <= 5) - return; - - rc_get_stats(c, &s); - - switch (c->type) { - case RC_VERTEX_PROGRAM: - fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" - "~%4u Instructions\n" - "~%4u Flow Control Instructions\n" - "~%4u Temporary Registers\n" - "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", - s.num_insts, s.num_fc_insts, s.num_temp_regs); - break; - - case RC_FRAGMENT_PROGRAM: - fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n" - "~%4u Instructions\n" - "~%4u Vector Instructions (RGB)\n" - "~%4u Scalar Instructions (Alpha)\n" - "~%4u Flow Control Instructions\n" - "~%4u Texture Instructions\n" - "~%4u Presub Operations\n" - "~%4u Temporary Registers\n" - "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", - s.num_insts, s.num_rgb_insts, s.num_alpha_insts, - s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, - s.num_temp_regs); - break; - default: - assert(0); - } -} - -static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { - "Vertex Program", - "Fragment Program" -}; - -void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) -{ - for (unsigned i = 0; list[i].name; i++) { - if (list[i].predicate) { - list[i].run(c, list[i].user); - - if (c->Error) - return; - - if ((c->Debug & RC_DBG_LOG) && list[i].dump) { - fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); - rc_print_program(&c->Program); - } - } - } -} - -/* Executes a list of compiler passes given in the parameter 'list'. */ -void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) -{ - struct rc_program_stats s; - - rc_get_stats(c, &s); - c->initial_num_insts = s.num_insts; - - if (c->Debug & RC_DBG_LOG) { - fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); - rc_print_program(&c->Program); - } - - rc_run_compiler_passes(c, list); - - if (c->Debug & RC_DBG_STATS) - print_stats(c); -} - -void rc_validate_final_shader(struct radeon_compiler *c, void *user) -{ - /* Check the number of constants. */ - if (c->Program.Constants.Count > c->max_constants) { - rc_error(c, "Too many constants. Max: %i, Got: %i\n", - c->max_constants, c->Program.Constants.Count); - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h deleted file mode 100644 index 2d8e415..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright 2009 Nicolai Hähnle - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef RADEON_COMPILER_H -#define RADEON_COMPILER_H - -#include "../../../../main/compiler.h" - -#include "memory_pool.h" -#include "radeon_code.h" -#include "radeon_program.h" -#include "radeon_emulate_loops.h" - -#define RC_DBG_LOG (1 << 0) -#define RC_DBG_STATS (1 << 1) - -struct rc_swizzle_caps; - -enum rc_program_type { - RC_VERTEX_PROGRAM, - RC_FRAGMENT_PROGRAM, - RC_NUM_PROGRAM_TYPES -}; - -struct radeon_compiler { - struct memory_pool Pool; - struct rc_program Program; - enum rc_program_type type; - unsigned Debug:2; - unsigned Error:1; - char * ErrorMsg; - - /* Hardware specification. */ - unsigned is_r400:1; - unsigned is_r500:1; - unsigned has_half_swizzles:1; - unsigned has_presub:1; - unsigned disable_optimizations:1; - unsigned max_temp_regs; - unsigned max_constants; - int max_alu_insts; - unsigned max_tex_insts; - - /* Whether to remove unused constants and empty holes in constant space. */ - unsigned remove_unused_constants:1; - - /** - * Variables used internally, not be touched by callers - * of the compiler - */ - /*@{*/ - struct rc_swizzle_caps * SwizzleCaps; - /*@}*/ - - struct emulate_loop_state loop_state; - - unsigned initial_num_insts; /* Number of instructions at start. */ -}; - -void rc_init(struct radeon_compiler * c); -void rc_destroy(struct radeon_compiler * c); - -void rc_debug(struct radeon_compiler * c, const char * fmt, ...); -void rc_error(struct radeon_compiler * c, const char * fmt, ...); - -int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion); - -/** - * This macro acts like an if-statement that can be used to implement - * non-aborting assertions in the compiler. - * - * It checks whether \p cond is true. If not, an internal compiler error is - * flagged and the if-clause is run. - * - * A typical use-case would be: - * - * if (rc_assert(c, condition-that-must-be-true)) - * return; - */ -#define rc_assert(c, cond) \ - (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond)) - -void rc_calculate_inputs_outputs(struct radeon_compiler * c); - -void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input); -void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); -void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); -void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, - int full_vtransform); -void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face); - -struct r300_fragment_program_compiler { - struct radeon_compiler Base; - struct rX00_fragment_program_code *code; - /* Optional transformations and features. */ - struct r300_fragment_program_external_state state; - unsigned enable_shadow_ambient; - /* Register corresponding to the depthbuffer. */ - unsigned OutputDepth; - /* Registers corresponding to the four colorbuffers. */ - unsigned OutputColor[4]; - - void * UserData; - void (*AllocateHwInputs)( - struct r300_fragment_program_compiler * c, - void (*allocate)(void * data, unsigned input, unsigned hwreg), - void * mydata); -}; - -void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); - -struct r300_vertex_program_compiler { - struct radeon_compiler Base; - struct r300_vertex_program_code *code; - uint32_t RequiredOutputs; - - void * UserData; - void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); - - int PredicateIndex; - unsigned int PredicateMask; -}; - -void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); -void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user); - -struct radeon_compiler_pass { - const char *name; /* Name of the pass. */ - int dump; /* Dump the program if Debug == 1? */ - int predicate; /* Run this pass? */ - void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */ - void *user; /* Optional parameter which is passed to the run function. */ -}; - -struct rc_program_stats { - unsigned num_insts; - unsigned num_fc_insts; - unsigned num_tex_insts; - unsigned num_rgb_insts; - unsigned num_alpha_insts; - unsigned num_presub_ops; - unsigned num_temp_regs; -}; - -void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s); - -/* Executes a list of compiler passes given in the parameter 'list'. */ -void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list); -void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list); -void rc_validate_final_shader(struct radeon_compiler *c, void *user); - -#endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c deleted file mode 100644 index 2742721..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ /dev/null @@ -1,701 +0,0 @@ -/* - * Copyright 2010 Tom Stellard - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * \file - */ - -#include "radeon_compiler_util.h" - -#include "radeon_compiler.h" -#include "radeon_dataflow.h" -/** - */ -unsigned int rc_swizzle_to_writemask(unsigned int swz) -{ - unsigned int mask = 0; - unsigned int i; - - for(i = 0; i < 4; i++) { - mask |= 1 << GET_SWZ(swz, i); - } - mask &= RC_MASK_XYZW; - - return mask; -} - -rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) -{ - if (idx & 0x4) - return idx; - return GET_SWZ(swz, idx); -} - -/** - * The purpose of this function is to standardize the number channels used by - * swizzles. All swizzles regardless of what instruction they are a part of - * should have 4 channels initialized with values. - * @param channels The number of channels in initial_value that have a - * meaningful value. - * @return An initialized swizzle that has all of the unused channels set to - * RC_SWIZZLE_UNUSED. - */ -unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels) -{ - unsigned int i; - for (i = channels; i < 4; i++) { - SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED); - } - return initial_value; -} - -unsigned int combine_swizzles4(unsigned int src, - rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) -{ - unsigned int ret = 0; - - ret |= get_swz(src, swz_x); - ret |= get_swz(src, swz_y) << 3; - ret |= get_swz(src, swz_z) << 6; - ret |= get_swz(src, swz_w) << 9; - - return ret; -} - -unsigned int combine_swizzles(unsigned int src, unsigned int swz) -{ - unsigned int ret = 0; - - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; - - return ret; -} - -/** - * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W - */ -rc_swizzle rc_mask_to_swizzle(unsigned int mask) -{ - switch (mask) { - case RC_MASK_X: return RC_SWIZZLE_X; - case RC_MASK_Y: return RC_SWIZZLE_Y; - case RC_MASK_Z: return RC_SWIZZLE_Z; - case RC_MASK_W: return RC_SWIZZLE_W; - } - return RC_SWIZZLE_UNUSED; -} - -/* Reorder mask bits according to swizzle. */ -unsigned swizzle_mask(unsigned swizzle, unsigned mask) -{ - unsigned ret = 0; - for (unsigned chan = 0; chan < 4; ++chan) { - unsigned swz = GET_SWZ(swizzle, chan); - if (swz < 4) - ret |= GET_BIT(mask, swz) << chan; - } - return ret; -} - -static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info) -{ - if (info->HasTexture) { - return 0; - } - switch (info->Opcode) { - case RC_OPCODE_DP2: - case RC_OPCODE_DP3: - case RC_OPCODE_DP4: - case RC_OPCODE_DDX: - case RC_OPCODE_DDY: - return 0; - default: - return 1; - } -} - -/** - * @return A swizzle the results from converting old_swizzle using - * conversion_swizzle - */ -unsigned int rc_adjust_channels( - unsigned int old_swizzle, - unsigned int conversion_swizzle) -{ - unsigned int i; - unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); - for (i = 0; i < 4; i++) { - unsigned int new_chan = get_swz(conversion_swizzle, i); - if (new_chan == RC_SWIZZLE_UNUSED) { - continue; - } - SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i)); - } - return new_swizzle; -} - -static unsigned int rewrite_writemask( - unsigned int old_mask, - unsigned int conversion_swizzle) -{ - unsigned int new_mask = 0; - unsigned int i; - - for (i = 0; i < 4; i++) { - if (!GET_BIT(old_mask, i) - || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) { - continue; - } - new_mask |= (1 << GET_SWZ(conversion_swizzle, i)); - } - - return new_mask; -} - -/** - * This function rewrites the writemask of sub and adjusts the swizzles - * of all its source registers based on the conversion_swizzle. - * conversion_swizzle represents a mapping of the old writemask to the - * new writemask. For a detailed description of how conversion swizzles - * work see rc_rewrite_swizzle(). - */ -void rc_pair_rewrite_writemask( - struct rc_pair_sub_instruction * sub, - unsigned int conversion_swizzle) -{ - const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); - unsigned int i; - - sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle); - - if (!srcs_need_rewrite(info)) { - return ; - } - - for (i = 0; i < info->NumSrcRegs; i++) { - sub->Arg[i].Swizzle = - rc_adjust_channels(sub->Arg[i].Swizzle, - conversion_swizzle); - } -} - -static void normal_rewrite_writemask_cb( - void * userdata, - struct rc_instruction * inst, - struct rc_src_register * src) -{ - unsigned int * new_mask = (unsigned int *)userdata; - src->Swizzle = rc_adjust_channels(src->Swizzle, *new_mask); -} - -/** - * This function is the same as rc_pair_rewrite_writemask() except it - * operates on normal instructions. - */ -void rc_normal_rewrite_writemask( - struct rc_instruction * inst, - unsigned int conversion_swizzle) -{ - unsigned int new_mask; - struct rc_sub_instruction * sub = &inst->U.I; - const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); - sub->DstReg.WriteMask = - rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle); - - if (info->HasTexture) { - unsigned int i; - assert(sub->TexSwizzle == RC_SWIZZLE_XYZW); - for (i = 0; i < 4; i++) { - unsigned int swz = GET_SWZ(conversion_swizzle, i); - if (swz > 3) - continue; - SET_SWZ(sub->TexSwizzle, swz, i); - } - } - - if (!srcs_need_rewrite(info)) { - return; - } - - new_mask = sub->DstReg.WriteMask; - rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask); -} - -/** - * This function replaces each value 'swz' in swizzle with the value of - * GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's - * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want - * to change all the Y's in swizzle to X, then conversion_swizzle should be - * _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then - * conversion swizzle should be YX__ (0xfc1). - * @param swizzle The swizzle to change - * @param conversion_swizzle Describes the conversion to perform on the swizzle - * @return A converted swizzle - */ -unsigned int rc_rewrite_swizzle( - unsigned int swizzle, - unsigned int conversion_swizzle) -{ - unsigned int chan; - unsigned int out_swizzle = swizzle; - - for (chan = 0; chan < 4; chan++) { - unsigned int swz = GET_SWZ(swizzle, chan); - unsigned int new_swz; - if (swz > 3) { - SET_SWZ(out_swizzle, chan, swz); - } else { - new_swz = GET_SWZ(conversion_swizzle, swz); - if (new_swz != RC_SWIZZLE_UNUSED) { - SET_SWZ(out_swizzle, chan, new_swz); - } else { - SET_SWZ(out_swizzle, chan, swz); - } - } - } - return out_swizzle; -} - -/** - * Left multiplication of a register with a swizzle - */ -struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) -{ - struct rc_src_register tmp = srcreg; - int i; - tmp.Swizzle = 0; - tmp.Negate = 0; - for(i = 0; i < 4; ++i) { - rc_swizzle swz = GET_SWZ(swizzle, i); - if (swz < 4) { - tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; - } else { - tmp.Swizzle |= swz << (i*3); - } - } - return tmp; -} - -void reset_srcreg(struct rc_src_register* reg) -{ - memset(reg, 0, sizeof(struct rc_src_register)); - reg->Swizzle = RC_SWIZZLE_XYZW; -} - -unsigned int rc_src_reads_dst_mask( - rc_register_file src_file, - unsigned int src_idx, - unsigned int src_swz, - rc_register_file dst_file, - unsigned int dst_idx, - unsigned int dst_mask) -{ - if (src_file != dst_file || src_idx != dst_idx) { - return RC_MASK_NONE; - } - return dst_mask & rc_swizzle_to_writemask(src_swz); -} - -/** - * @return A bit mask specifying whether this swizzle will select from an RGB - * source, an Alpha source, or both. - */ -unsigned int rc_source_type_swz(unsigned int swizzle) -{ - unsigned int chan; - unsigned int swz = RC_SWIZZLE_UNUSED; - unsigned int ret = RC_SOURCE_NONE; - - for(chan = 0; chan < 4; chan++) { - swz = GET_SWZ(swizzle, chan); - if (swz == RC_SWIZZLE_W) { - ret |= RC_SOURCE_ALPHA; - } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y - || swz == RC_SWIZZLE_Z) { - ret |= RC_SOURCE_RGB; - } - } - return ret; -} - -unsigned int rc_source_type_mask(unsigned int mask) -{ - unsigned int ret = RC_SOURCE_NONE; - - if (mask & RC_MASK_XYZ) - ret |= RC_SOURCE_RGB; - - if (mask & RC_MASK_W) - ret |= RC_SOURCE_ALPHA; - - return ret; -} - -struct src_select { - rc_register_file File; - int Index; - unsigned int SrcType; -}; - -struct can_use_presub_data { - struct src_select Selects[5]; - unsigned int SelectCount; - const struct rc_src_register * ReplaceReg; - unsigned int ReplaceRemoved; -}; - -static void can_use_presub_data_add_select( - struct can_use_presub_data * data, - rc_register_file file, - unsigned int index, - unsigned int src_type) -{ - struct src_select * select; - - select = &data->Selects[data->SelectCount++]; - select->File = file; - select->Index = index; - select->SrcType = src_type; -} - -/** - * This callback function counts the number of sources in inst that are - * different from the sources in can_use_presub_data->RemoveSrcs. - */ -static void can_use_presub_read_cb( - void * userdata, - struct rc_instruction * inst, - struct rc_src_register * src) -{ - struct can_use_presub_data * d = userdata; - - if (!d->ReplaceRemoved && src == d->ReplaceReg) { - d->ReplaceRemoved = 1; - return; - } - - if (src->File == RC_FILE_NONE) - return; - - can_use_presub_data_add_select(d, src->File, src->Index, - rc_source_type_swz(src->Swizzle)); -} - -unsigned int rc_inst_can_use_presub( - struct rc_instruction * inst, - rc_presubtract_op presub_op, - unsigned int presub_writemask, - const struct rc_src_register * replace_reg, - const struct rc_src_register * presub_src0, - const struct rc_src_register * presub_src1) -{ - struct can_use_presub_data d; - unsigned int num_presub_srcs; - unsigned int i; - const struct rc_opcode_info * info = - rc_get_opcode_info(inst->U.I.Opcode); - int rgb_count = 0, alpha_count = 0; - unsigned int src_type0, src_type1; - - if (presub_op == RC_PRESUB_NONE) { - return 1; - } - - if (info->HasTexture) { - return 0; - } - - /* We can't use more than one presubtract value in an - * instruction, unless the two prsubtract operations - * are the same and read from the same registers. - * XXX For now we will limit instructions to only one presubtract - * value.*/ - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { - return 0; - } - - memset(&d, 0, sizeof(d)); - d.ReplaceReg = replace_reg; - - rc_for_all_reads_src(inst, can_use_presub_read_cb, &d); - - num_presub_srcs = rc_presubtract_src_reg_count(presub_op); - - src_type0 = rc_source_type_swz(presub_src0->Swizzle); - can_use_presub_data_add_select(&d, - presub_src0->File, - presub_src0->Index, - src_type0); - - if (num_presub_srcs > 1) { - src_type1 = rc_source_type_swz(presub_src1->Swizzle); - can_use_presub_data_add_select(&d, - presub_src1->File, - presub_src1->Index, - src_type1); - - /* Even if both of the presub sources read from the same - * register, we still need to use 2 different source selects - * for them, so we need to increment the count to compensate. - */ - if (presub_src0->File == presub_src1->File - && presub_src0->Index == presub_src1->Index) { - if (src_type0 & src_type1 & RC_SOURCE_RGB) { - rgb_count++; - } - if (src_type0 & src_type1 & RC_SOURCE_ALPHA) { - alpha_count++; - } - } - } - - /* Count the number of source selects for Alpha and RGB. If we - * encounter two of the same source selects then we can ignore the - * first one. */ - for (i = 0; i < d.SelectCount; i++) { - unsigned int j; - unsigned int src_type = d.Selects[i].SrcType; - for (j = i + 1; j < d.SelectCount; j++) { - if (d.Selects[i].File == d.Selects[j].File - && d.Selects[i].Index == d.Selects[j].Index) { - src_type &= ~d.Selects[j].SrcType; - } - } - if (src_type & RC_SOURCE_RGB) { - rgb_count++; - } - - if (src_type & RC_SOURCE_ALPHA) { - alpha_count++; - } - } - - if (rgb_count > 3 || alpha_count > 3) { - return 0; - } - - return 1; -} - -struct max_data { - unsigned int Max; - unsigned int HasFileType; - rc_register_file File; -}; - -static void max_callback( - void * userdata, - struct rc_instruction * inst, - rc_register_file file, - unsigned int index, - unsigned int mask) -{ - struct max_data * d = (struct max_data*)userdata; - if (file == d->File && (!d->HasFileType || index > d->Max)) { - d->Max = index; - d->HasFileType = 1; - } -} - -/** - * @return The maximum index of the specified register file used by the - * program. - */ -int rc_get_max_index( - struct radeon_compiler * c, - rc_register_file file) -{ - struct max_data data; - struct rc_instruction * inst; - data.Max = 0; - data.HasFileType = 0; - data.File = file; - for (inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - rc_for_all_reads_mask(inst, max_callback, &data); - rc_for_all_writes_mask(inst, max_callback, &data); - } - if (!data.HasFileType) { - return -1; - } else { - return data.Max; - } -} - -static unsigned int get_source_readmask( - struct rc_pair_sub_instruction * sub, - unsigned int source, - unsigned int src_type) -{ - unsigned int i; - unsigned int readmask = 0; - const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); - - for (i = 0; i < info->NumSrcRegs; i++) { - if (sub->Arg[i].Source != source - || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) { - continue; - } - readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle); - } - return readmask; -} - -/** - * This function attempts to remove a source from a pair instructions. - * @param inst - * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd - * @param source The index of the source to remove - * @param new_readmask A mask representing the components that are read by - * the source that is intended to replace the one you are removing. If you - * want to remove a source only and not replace it, this parameter should be - * zero. - * @return 1 if the source was successfully removed, 0 if it was not - */ -unsigned int rc_pair_remove_src( - struct rc_instruction * inst, - unsigned int src_type, - unsigned int source, - unsigned int new_readmask) -{ - unsigned int readmask = 0; - - readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type); - readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type); - - if ((new_readmask & readmask) != readmask) - return 0; - - if (src_type & RC_SOURCE_RGB) { - memset(&inst->U.P.RGB.Src[source], 0, - sizeof(struct rc_pair_instruction_source)); - } - - if (src_type & RC_SOURCE_ALPHA) { - memset(&inst->U.P.Alpha.Src[source], 0, - sizeof(struct rc_pair_instruction_source)); - } - - return 1; -} - -/** - * @return RC_OPCODE_NOOP if inst is not a flow control instruction. - * @return The opcode of inst if it is a flow control instruction. - */ -rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst) -{ - const struct rc_opcode_info * info; - if (inst->Type == RC_INSTRUCTION_NORMAL) { - info = rc_get_opcode_info(inst->U.I.Opcode); - } else { - info = rc_get_opcode_info(inst->U.P.RGB.Opcode); - /*A flow control instruction shouldn't have an alpha - * instruction.*/ - assert(!info->IsFlowControl || - inst->U.P.Alpha.Opcode == RC_OPCODE_NOP); - } - - if (info->IsFlowControl) - return info->Opcode; - else - return RC_OPCODE_NOP; - -} - -/** - * @return The BGNLOOP instruction that starts the loop ended by endloop. - */ -struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop) -{ - unsigned int endloop_count = 0; - struct rc_instruction * inst; - for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) { - rc_opcode op = rc_get_flow_control_inst(inst); - if (op == RC_OPCODE_ENDLOOP) { - endloop_count++; - } else if (op == RC_OPCODE_BGNLOOP) { - if (endloop_count == 0) { - return inst; - } else { - endloop_count--; - } - } - } - return NULL; -} - -/** - * @return The ENDLOOP instruction that ends the loop started by bgnloop. - */ -struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop) -{ - unsigned int bgnloop_count = 0; - struct rc_instruction * inst; - for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) { - rc_opcode op = rc_get_flow_control_inst(inst); - if (op == RC_OPCODE_BGNLOOP) { - bgnloop_count++; - } else if (op == RC_OPCODE_ENDLOOP) { - if (bgnloop_count == 0) { - return inst; - } else { - bgnloop_count--; - } - } - } - return NULL; -} - -/** - * @return A conversion swizzle for converting from old_mask->new_mask - */ -unsigned int rc_make_conversion_swizzle( - unsigned int old_mask, - unsigned int new_mask) -{ - unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); - unsigned int old_idx; - unsigned int new_idx = 0; - for (old_idx = 0; old_idx < 4; old_idx++) { - if (!GET_BIT(old_mask, old_idx)) - continue; - for ( ; new_idx < 4; new_idx++) { - if (GET_BIT(new_mask, new_idx)) { - SET_SWZ(conversion_swizzle, old_idx, new_idx); - new_idx++; - break; - } - } - } - return conversion_swizzle; -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h deleted file mode 100644 index 3730aa8..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ /dev/null @@ -1,89 +0,0 @@ -#include "radeon_program_constants.h" - -#ifndef RADEON_PROGRAM_UTIL_H -#define RADEON_PROGRAM_UTIL_H - -#include "radeon_opcodes.h" - -struct radeon_compiler; -struct rc_instruction; -struct rc_pair_instruction; -struct rc_pair_sub_instruction; -struct rc_src_register; - -unsigned int rc_swizzle_to_writemask(unsigned int swz); - -rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); - -unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels); - -unsigned int combine_swizzles4(unsigned int src, - rc_swizzle swz_x, rc_swizzle swz_y, - rc_swizzle swz_z, rc_swizzle swz_w); - -unsigned int combine_swizzles(unsigned int src, unsigned int swz); - -rc_swizzle rc_mask_to_swizzle(unsigned int mask); - -unsigned swizzle_mask(unsigned swizzle, unsigned mask); - -unsigned int rc_adjust_channels( - unsigned int old_swizzle, - unsigned int conversion_swizzle); - -void rc_pair_rewrite_writemask( - struct rc_pair_sub_instruction * sub, - unsigned int conversion_swizzle); - -void rc_normal_rewrite_writemask( - struct rc_instruction * inst, - unsigned int conversion_swizzle); - -unsigned int rc_rewrite_swizzle( - unsigned int swizzle, - unsigned int new_mask); - -struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); - -void reset_srcreg(struct rc_src_register* reg); - -unsigned int rc_src_reads_dst_mask( - rc_register_file src_file, - unsigned int src_idx, - unsigned int src_swz, - rc_register_file dst_file, - unsigned int dst_idx, - unsigned int dst_mask); - -unsigned int rc_source_type_swz(unsigned int swizzle); - -unsigned int rc_source_type_mask(unsigned int mask); - -unsigned int rc_inst_can_use_presub( - struct rc_instruction * inst, - rc_presubtract_op presub_op, - unsigned int presub_writemask, - const struct rc_src_register * replace_reg, - const struct rc_src_register * presub_src0, - const struct rc_src_register * presub_src1); - -int rc_get_max_index( - struct radeon_compiler * c, - rc_register_file file); - -unsigned int rc_pair_remove_src( - struct rc_instruction * inst, - unsigned int src_type, - unsigned int source, - unsigned int new_readmask); - -rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst); - -struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop); -struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop); - -unsigned int rc_make_conversion_swizzle( - unsigned int old_mask, - unsigned int new_mask); - -#endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c deleted file mode 100644 index a8decac..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ /dev/null @@ -1,892 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * Copyright 2010 Tom Stellard - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_dataflow.h" - -#include "radeon_compiler.h" -#include "radeon_compiler_util.h" -#include "radeon_program.h" - -struct read_write_mask_data { - void * UserData; - rc_read_write_mask_fn Cb; -}; - -static void reads_normal_callback( - void * userdata, - struct rc_instruction * fullinst, - struct rc_src_register * src) -{ - struct read_write_mask_data * cb_data = userdata; - unsigned int refmask = 0; - unsigned int chan; - for(chan = 0; chan < 4; chan++) { - refmask |= 1 << GET_SWZ(src->Swizzle, chan); - } - refmask &= RC_MASK_XYZW; - - if (refmask) { - cb_data->Cb(cb_data->UserData, fullinst, src->File, - src->Index, refmask); - } - - if (refmask && src->RelAddr) { - cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0, - RC_MASK_X); - } -} - -static void pair_get_src_refmasks(unsigned int * refmasks, - struct rc_pair_instruction * inst, - unsigned int swz, unsigned int src) -{ - if (swz >= 4) - return; - - if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) { - if(src == RC_PAIR_PRESUB_SRC) { - unsigned int i; - int srcp_regs = - rc_presubtract_src_reg_count( - inst->RGB.Src[src].Index); - for(i = 0; i < srcp_regs; i++) { - refmasks[i] |= 1 << swz; - } - } - else { - refmasks[src] |= 1 << swz; - } - } - - if (swz == RC_SWIZZLE_W) { - if (src == RC_PAIR_PRESUB_SRC) { - unsigned int i; - int srcp_regs = rc_presubtract_src_reg_count( - inst->Alpha.Src[src].Index); - for(i = 0; i < srcp_regs; i++) { - refmasks[i] |= 1 << swz; - } - } - else { - refmasks[src] |= 1 << swz; - } - } -} - -static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) -{ - struct rc_pair_instruction * inst = &fullinst->U.P; - unsigned int refmasks[3] = { 0, 0, 0 }; - - unsigned int arg; - - for(arg = 0; arg < 3; ++arg) { - unsigned int chan; - for(chan = 0; chan < 3; ++chan) { - unsigned int swz_rgb = - GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); - unsigned int swz_alpha = - GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan); - pair_get_src_refmasks(refmasks, inst, swz_rgb, - inst->RGB.Arg[arg].Source); - pair_get_src_refmasks(refmasks, inst, swz_alpha, - inst->Alpha.Arg[arg].Source); - } - } - - for(unsigned int src = 0; src < 3; ++src) { - if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) - cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, - refmasks[src] & RC_MASK_XYZ); - - if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) - cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W); - } -} - -static void pair_sub_for_all_args( - struct rc_instruction * fullinst, - struct rc_pair_sub_instruction * sub, - rc_pair_read_arg_fn cb, - void * userdata) -{ - int i; - const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); - - for(i = 0; i < info->NumSrcRegs; i++) { - unsigned int src_type; - - src_type = rc_source_type_swz(sub->Arg[i].Swizzle); - - if (src_type == RC_SOURCE_NONE) - continue; - - if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { - unsigned int presub_type; - unsigned int presub_src_count; - struct rc_pair_instruction_source * src_array; - unsigned int j; - - if (src_type & RC_SOURCE_RGB) { - presub_type = fullinst-> - U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; - src_array = fullinst->U.P.RGB.Src; - } else { - presub_type = fullinst-> - U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index; - src_array = fullinst->U.P.Alpha.Src; - } - presub_src_count - = rc_presubtract_src_reg_count(presub_type); - for(j = 0; j < presub_src_count; j++) { - cb(userdata, fullinst, &sub->Arg[i], - &src_array[j]); - } - } else { - struct rc_pair_instruction_source * src = - rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); - if (src) { - cb(userdata, fullinst, &sub->Arg[i], src); - } - } - } -} - -/* This function calls the callback function (cb) for each source used by - * the instruction. - * */ -void rc_for_all_reads_src( - struct rc_instruction * inst, - rc_read_src_fn cb, - void * userdata) -{ - const struct rc_opcode_info * opcode = - rc_get_opcode_info(inst->U.I.Opcode); - - /* This function only works with normal instructions. */ - if (inst->Type != RC_INSTRUCTION_NORMAL) { - assert(0); - return; - } - - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { - - if (inst->U.I.SrcReg[src].File == RC_FILE_NONE) - continue; - - if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) { - unsigned int i; - unsigned int srcp_regs = rc_presubtract_src_reg_count( - inst->U.I.PreSub.Opcode); - for( i = 0; i < srcp_regs; i++) { - cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]); - } - } else { - cb(userdata, inst, &inst->U.I.SrcReg[src]); - } - } -} - -/** - * This function calls the callback function (cb) for each arg of the RGB and - * alpha components. - */ -void rc_pair_for_all_reads_arg( - struct rc_instruction * inst, - rc_pair_read_arg_fn cb, - void * userdata) -{ - /* This function only works with pair instructions. */ - if (inst->Type != RC_INSTRUCTION_PAIR) { - assert(0); - return; - } - - pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata); - pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata); -} - -/** - * Calls a callback function for all register reads. - * - * This is conservative, i.e. if the same register is referenced multiple times, - * the callback may also be called multiple times. - * Also, the writemask of the instruction is not taken into account. - */ -void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) -{ - if (inst->Type == RC_INSTRUCTION_NORMAL) { - struct read_write_mask_data cb_data; - cb_data.UserData = userdata; - cb_data.Cb = cb; - - rc_for_all_reads_src(inst, reads_normal_callback, &cb_data); - } else { - reads_pair(inst, cb, userdata); - } -} - - - -static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) -{ - struct rc_sub_instruction * inst = &fullinst->U.I; - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - - if (opcode->HasDstReg && inst->DstReg.WriteMask) - cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); - - if (inst->WriteALUResult) - cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); -} - -static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) -{ - struct rc_pair_instruction * inst = &fullinst->U.P; - - if (inst->RGB.WriteMask) - cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask); - - if (inst->Alpha.WriteMask) - cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W); - - if (inst->WriteALUResult) - cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); -} - -/** - * Calls a callback function for all register writes in the instruction, - * reporting writemasks to the callback function. - * - * \warning Does not report output registers for paired instructions! - */ -void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) -{ - if (inst->Type == RC_INSTRUCTION_NORMAL) { - writes_normal(inst, cb, userdata); - } else { - writes_pair(inst, cb, userdata); - } -} - - -struct mask_to_chan_data { - void * UserData; - rc_read_write_chan_fn Fn; -}; - -static void mask_to_chan_cb(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) -{ - struct mask_to_chan_data * d = data; - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_BIT(mask, chan)) - d->Fn(d->UserData, inst, file, index, chan); - } -} - -/** - * Calls a callback function for all sourced register channels. - * - * This is conservative, i.e. channels may be called multiple times, - * and the writemask of the instruction is not taken into account. - */ -void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) -{ - struct mask_to_chan_data d; - d.UserData = userdata; - d.Fn = cb; - rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); -} - -/** - * Calls a callback function for all written register channels. - * - * \warning Does not report output registers for paired instructions! - */ -void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) -{ - struct mask_to_chan_data d; - d.UserData = userdata; - d.Fn = cb; - rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); -} - -static void remap_normal_instruction(struct rc_instruction * fullinst, - rc_remap_register_fn cb, void * userdata) -{ - struct rc_sub_instruction * inst = &fullinst->U.I; - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - unsigned int remapped_presub = 0; - - if (opcode->HasDstReg) { - rc_register_file file = inst->DstReg.File; - unsigned int index = inst->DstReg.Index; - - cb(userdata, fullinst, &file, &index); - - inst->DstReg.File = file; - inst->DstReg.Index = index; - } - - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { - rc_register_file file = inst->SrcReg[src].File; - unsigned int index = inst->SrcReg[src].Index; - - if (file == RC_FILE_PRESUB) { - unsigned int i; - unsigned int srcp_srcs = rc_presubtract_src_reg_count( - inst->PreSub.Opcode); - /* Make sure we only remap presubtract sources once in - * case more than one source register reads the - * presubtract result. */ - if (remapped_presub) - continue; - - for(i = 0; i < srcp_srcs; i++) { - file = inst->PreSub.SrcReg[i].File; - index = inst->PreSub.SrcReg[i].Index; - cb(userdata, fullinst, &file, &index); - inst->PreSub.SrcReg[i].File = file; - inst->PreSub.SrcReg[i].Index = index; - } - remapped_presub = 1; - } - else { - cb(userdata, fullinst, &file, &index); - - inst->SrcReg[src].File = file; - inst->SrcReg[src].Index = index; - } - } -} - -static void remap_pair_instruction(struct rc_instruction * fullinst, - rc_remap_register_fn cb, void * userdata) -{ - struct rc_pair_instruction * inst = &fullinst->U.P; - - if (inst->RGB.WriteMask) { - rc_register_file file = RC_FILE_TEMPORARY; - unsigned int index = inst->RGB.DestIndex; - - cb(userdata, fullinst, &file, &index); - - inst->RGB.DestIndex = index; - } - - if (inst->Alpha.WriteMask) { - rc_register_file file = RC_FILE_TEMPORARY; - unsigned int index = inst->Alpha.DestIndex; - - cb(userdata, fullinst, &file, &index); - - inst->Alpha.DestIndex = index; - } - - for(unsigned int src = 0; src < 3; ++src) { - if (inst->RGB.Src[src].Used) { - rc_register_file file = inst->RGB.Src[src].File; - unsigned int index = inst->RGB.Src[src].Index; - - cb(userdata, fullinst, &file, &index); - - inst->RGB.Src[src].File = file; - inst->RGB.Src[src].Index = index; - } - - if (inst->Alpha.Src[src].Used) { - rc_register_file file = inst->Alpha.Src[src].File; - unsigned int index = inst->Alpha.Src[src].Index; - - cb(userdata, fullinst, &file, &index); - - inst->Alpha.Src[src].File = file; - inst->Alpha.Src[src].Index = index; - } - } -} - - -/** - * Remap all register accesses according to the given function. - * That is, call the function \p cb for each referenced register (both read and written) - * and update the given instruction \p inst accordingly - * if it modifies its \ref pfile and \ref pindex contents. - */ -void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata) -{ - if (inst->Type == RC_INSTRUCTION_NORMAL) - remap_normal_instruction(inst, cb, userdata); - else - remap_pair_instruction(inst, cb, userdata); -} - -struct branch_write_mask { - unsigned int IfWriteMask:4; - unsigned int ElseWriteMask:4; - unsigned int HasElse:1; -}; - -union get_readers_read_cb { - rc_read_src_fn I; - rc_pair_read_arg_fn P; -}; - -struct get_readers_callback_data { - struct radeon_compiler * C; - struct rc_reader_data * ReaderData; - rc_read_src_fn ReadNormalCB; - rc_pair_read_arg_fn ReadPairCB; - rc_read_write_mask_fn WriteCB; - rc_register_file DstFile; - unsigned int DstIndex; - unsigned int DstMask; - unsigned int AliveWriteMask; - /* For convenience, this is indexed starting at 1 */ - struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1]; -}; - -static struct rc_reader * add_reader( - struct memory_pool * pool, - struct rc_reader_data * data, - struct rc_instruction * inst, - unsigned int mask) -{ - struct rc_reader * new; - memory_pool_array_reserve(pool, struct rc_reader, data->Readers, - data->ReaderCount, data->ReadersReserved, 1); - new = &data->Readers[data->ReaderCount++]; - new->Inst = inst; - new->WriteMask = mask; - return new; -} - -static void add_reader_normal( - struct memory_pool * pool, - struct rc_reader_data * data, - struct rc_instruction * inst, - unsigned int mask, - struct rc_src_register * src) -{ - struct rc_reader * new = add_reader(pool, data, inst, mask); - new->U.I.Src = src; -} - - -static void add_reader_pair( - struct memory_pool * pool, - struct rc_reader_data * data, - struct rc_instruction * inst, - unsigned int mask, - struct rc_pair_instruction_arg * arg, - struct rc_pair_instruction_source * src) -{ - struct rc_reader * new = add_reader(pool, data, inst, mask); - new->U.P.Src = src; - new->U.P.Arg = arg; -} - -static unsigned int get_readers_read_callback( - struct get_readers_callback_data * cb_data, - unsigned int has_rel_addr, - rc_register_file file, - unsigned int index, - unsigned int swizzle) -{ - unsigned int shared_mask, read_mask; - - if (has_rel_addr) { - cb_data->ReaderData->Abort = 1; - return RC_MASK_NONE; - } - - shared_mask = rc_src_reads_dst_mask(file, index, swizzle, - cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask); - - if (shared_mask == RC_MASK_NONE) - return shared_mask; - - /* If we make it this far, it means that this source reads from the - * same register written to by d->ReaderData->Writer. */ - - read_mask = rc_swizzle_to_writemask(swizzle); - if (cb_data->ReaderData->AbortOnRead & read_mask) { - cb_data->ReaderData->Abort = 1; - return shared_mask; - } - - if (cb_data->ReaderData->LoopDepth > 0) { - cb_data->ReaderData->AbortOnWrite |= - (read_mask & cb_data->AliveWriteMask); - } - - /* XXX The behavior in this case should be configurable. */ - if ((read_mask & cb_data->AliveWriteMask) != read_mask) { - cb_data->ReaderData->Abort = 1; - return shared_mask; - } - - return shared_mask; -} - -static void get_readers_pair_read_callback( - void * userdata, - struct rc_instruction * inst, - struct rc_pair_instruction_arg * arg, - struct rc_pair_instruction_source * src) -{ - unsigned int shared_mask; - struct get_readers_callback_data * d = userdata; - - shared_mask = get_readers_read_callback(d, - 0 /*Pair Instructions don't use RelAddr*/, - src->File, src->Index, arg->Swizzle); - - if (shared_mask == RC_MASK_NONE) - return; - - if (d->ReadPairCB) - d->ReadPairCB(d->ReaderData, inst, arg, src); - - if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) - return; - - add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src); -} - -/** - * This function is used by rc_get_readers_normal() to determine whether inst - * is a reader of userdata->ReaderData->Writer - */ -static void get_readers_normal_read_callback( - void * userdata, - struct rc_instruction * inst, - struct rc_src_register * src) -{ - struct get_readers_callback_data * d = userdata; - unsigned int shared_mask; - - shared_mask = get_readers_read_callback(d, - src->RelAddr, src->File, src->Index, src->Swizzle); - - if (shared_mask == RC_MASK_NONE) - return; - /* The callback function could potentially clear d->ReaderData->Abort, - * so we need to call it before we return. */ - if (d->ReadNormalCB) - d->ReadNormalCB(d->ReaderData, inst, src); - - if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) - return; - - add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src); -} - -/** - * This function is used by rc_get_readers_normal() to determine when - * userdata->ReaderData->Writer is dead (i. e. All compontents of its - * destination register have been overwritten by other instructions). - */ -static void get_readers_write_callback( - void *userdata, - struct rc_instruction * inst, - rc_register_file file, - unsigned int index, - unsigned int mask) -{ - struct get_readers_callback_data * d = userdata; - - if (index == d->DstIndex && file == d->DstFile) { - unsigned int shared_mask = mask & d->DstMask; - d->ReaderData->AbortOnRead &= ~shared_mask; - d->AliveWriteMask &= ~shared_mask; - if (d->ReaderData->AbortOnWrite & shared_mask) { - d->ReaderData->Abort = 1; - } - } - - if(d->WriteCB) - d->WriteCB(d->ReaderData, inst, file, index, mask); -} - -static void push_branch_mask( - struct get_readers_callback_data * d, - unsigned int * branch_depth) -{ - (*branch_depth)++; - if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { - d->ReaderData->Abort = 1; - return; - } - d->BranchMasks[*branch_depth].IfWriteMask = - d->AliveWriteMask; -} - -static void pop_branch_mask( - struct get_readers_callback_data * d, - unsigned int * branch_depth) -{ - struct branch_write_mask * masks = &d->BranchMasks[*branch_depth]; - - if (masks->HasElse) { - /* Abort on read for components that were written in the IF - * block. */ - d->ReaderData->AbortOnRead |= - masks->IfWriteMask & ~masks->ElseWriteMask; - /* Abort on read for components that were written in the ELSE - * block. */ - d->ReaderData->AbortOnRead |= - masks->ElseWriteMask & ~d->AliveWriteMask; - - d->AliveWriteMask = masks->IfWriteMask - ^ ((masks->IfWriteMask ^ masks->ElseWriteMask) - & (masks->IfWriteMask ^ d->AliveWriteMask)); - } else { - d->ReaderData->AbortOnRead |= - masks->IfWriteMask & ~d->AliveWriteMask; - d->AliveWriteMask = masks->IfWriteMask; - - } - memset(masks, 0, sizeof(struct branch_write_mask)); - (*branch_depth)--; -} - -static void get_readers_for_single_write( - void * userdata, - struct rc_instruction * writer, - rc_register_file dst_file, - unsigned int dst_index, - unsigned int dst_mask) -{ - struct rc_instruction * tmp; - unsigned int branch_depth = 0; - struct rc_instruction * endloop = NULL; - unsigned int abort_on_read_at_endloop = 0; - struct get_readers_callback_data * d = userdata; - - d->ReaderData->Writer = writer; - d->ReaderData->AbortOnRead = 0; - d->ReaderData->AbortOnWrite = 0; - d->ReaderData->LoopDepth = 0; - d->ReaderData->InElse = 0; - d->DstFile = dst_file; - d->DstIndex = dst_index; - d->DstMask = dst_mask; - d->AliveWriteMask = dst_mask; - memset(d->BranchMasks, 0, sizeof(d->BranchMasks)); - - if (!dst_mask) - return; - - for(tmp = writer->Next; tmp != &d->C->Program.Instructions; - tmp = tmp->Next){ - rc_opcode opcode = rc_get_flow_control_inst(tmp); - switch(opcode) { - case RC_OPCODE_BGNLOOP: - d->ReaderData->LoopDepth++; - push_branch_mask(d, &branch_depth); - break; - case RC_OPCODE_ENDLOOP: - if (d->ReaderData->LoopDepth > 0) { - d->ReaderData->LoopDepth--; - if (d->ReaderData->LoopDepth == 0) { - d->ReaderData->AbortOnWrite = 0; - } - pop_branch_mask(d, &branch_depth); - } else { - /* Here we have reached an ENDLOOP without - * seeing its BGNLOOP. These means that - * the writer was written inside of a loop, - * so it could have readers that are above it - * (i.e. they have a lower IP). To find these - * readers we jump to the BGNLOOP instruction - * and check each instruction until we get - * back to the writer. - */ - endloop = tmp; - tmp = rc_match_endloop(tmp); - if (!tmp) { - rc_error(d->C, "Failed to match endloop.\n"); - d->ReaderData->Abort = 1; - return; - } - abort_on_read_at_endloop = d->ReaderData->AbortOnRead; - d->ReaderData->AbortOnRead |= d->AliveWriteMask; - continue; - } - break; - case RC_OPCODE_IF: - push_branch_mask(d, &branch_depth); - break; - case RC_OPCODE_ELSE: - if (branch_depth == 0) { - d->ReaderData->InElse = 1; - } else { - unsigned int temp_mask = d->AliveWriteMask; - d->AliveWriteMask = - d->BranchMasks[branch_depth].IfWriteMask; - d->BranchMasks[branch_depth].ElseWriteMask = - temp_mask; - d->BranchMasks[branch_depth].HasElse = 1; - } - break; - case RC_OPCODE_ENDIF: - if (branch_depth == 0) { - d->ReaderData->AbortOnRead = d->AliveWriteMask; - d->ReaderData->InElse = 0; - } - else { - pop_branch_mask(d, &branch_depth); - } - break; - default: - break; - } - - if (d->ReaderData->InElse) - continue; - - if (tmp->Type == RC_INSTRUCTION_NORMAL) { - rc_for_all_reads_src(tmp, - get_readers_normal_read_callback, d); - } else { - rc_pair_for_all_reads_arg(tmp, - get_readers_pair_read_callback, d); - } - - /* This can happen when we jump from an ENDLOOP to BGNLOOP */ - if (tmp == writer) { - tmp = endloop; - endloop = NULL; - d->ReaderData->AbortOnRead = abort_on_read_at_endloop; - continue; - } - rc_for_all_writes_mask(tmp, get_readers_write_callback, d); - - if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort) - return; - - if (branch_depth == 0 && !d->AliveWriteMask) - return; - } -} - -static void init_get_readers_callback_data( - struct get_readers_callback_data * d, - struct rc_reader_data * reader_data, - struct radeon_compiler * c, - rc_read_src_fn read_normal_cb, - rc_pair_read_arg_fn read_pair_cb, - rc_read_write_mask_fn write_cb) -{ - reader_data->Abort = 0; - reader_data->ReaderCount = 0; - reader_data->ReadersReserved = 0; - reader_data->Readers = NULL; - - d->C = c; - d->ReaderData = reader_data; - d->ReadNormalCB = read_normal_cb; - d->ReadPairCB = read_pair_cb; - d->WriteCB = write_cb; -} - -/** - * This function will create a list of readers via the rc_reader_data struct. - * This function will abort (set the flag data->Abort) and return if it - * encounters an instruction that reads from @param writer and also a different - * instruction. Here are some examples: - * - * writer = instruction 0; - * 0 MOV TEMP[0].xy, TEMP[1].xy - * 1 MOV TEMP[0].zw, TEMP[2].xy - * 2 MOV TEMP[3], TEMP[0] - * The Abort flag will be set on instruction 2, because it reads values written - * by instructions 0 and 1. - * - * writer = instruction 1; - * 0 IF TEMP[0].x - * 1 MOV TEMP[1], TEMP[2] - * 2 ELSE - * 3 MOV TEMP[1], TEMP[2] - * 4 ENDIF - * 5 MOV TEMP[3], TEMP[1] - * The Abort flag will be set on instruction 5, because it could read from the - * value written by either instruction 1 or 3, depending on the jump decision - * made at instruction 0. - * - * writer = instruction 0; - * 0 MOV TEMP[0], TEMP[1] - * 2 BGNLOOP - * 3 ADD TEMP[0], TEMP[0], none.1 - * 4 ENDLOOP - * The Abort flag will be set on instruction 3, because in the first iteration - * of the loop it reads the value written by instruction 0 and in all other - * iterations it reads the value written by instruction 3. - * - * @param read_cb This function will be called for for every instruction that - * has been determined to be a reader of writer. - * @param write_cb This function will be called for every instruction after - * writer. - */ -void rc_get_readers( - struct radeon_compiler * c, - struct rc_instruction * writer, - struct rc_reader_data * data, - rc_read_src_fn read_normal_cb, - rc_pair_read_arg_fn read_pair_cb, - rc_read_write_mask_fn write_cb) -{ - struct get_readers_callback_data d; - - init_get_readers_callback_data(&d, data, c, read_normal_cb, - read_pair_cb, write_cb); - - rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); -} - -void rc_get_readers_sub( - struct radeon_compiler * c, - struct rc_instruction * writer, - struct rc_pair_sub_instruction * sub_writer, - struct rc_reader_data * data, - rc_read_src_fn read_normal_cb, - rc_pair_read_arg_fn read_pair_cb, - rc_read_write_mask_fn write_cb) -{ - struct get_readers_callback_data d; - - init_get_readers_callback_data(&d, data, c, read_normal_cb, - read_pair_cb, write_cb); - - if (sub_writer->WriteMask) { - get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY, - sub_writer->DestIndex, sub_writer->WriteMask); - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h deleted file mode 100644 index d8a6272..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * Copyright 2010 Tom Stellard - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef RADEON_DATAFLOW_H -#define RADEON_DATAFLOW_H - -#include "radeon_program_constants.h" - -struct radeon_compiler; -struct rc_instruction; -struct rc_swizzle_caps; -struct rc_src_register; -struct rc_pair_instruction_arg; -struct rc_pair_instruction_source; -struct rc_pair_sub_instruction; -struct rc_compiler; - - -/** - * Help analyze and modify the register accesses of instructions. - */ -/*@{*/ -typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int chan); -void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); -void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); - -typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask); -void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); -void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); - -typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst, - struct rc_src_register * src); -void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb, - void * userdata); - -typedef void (*rc_pair_read_arg_fn)(void * userdata, - struct rc_instruction * inst, struct rc_pair_instruction_arg * arg, - struct rc_pair_instruction_source * src); -void rc_pair_for_all_reads_arg(struct rc_instruction * inst, - rc_pair_read_arg_fn cb, void * userdata); - -typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, - rc_register_file * pfile, unsigned int * pindex); -void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata); -/*@}*/ - -struct rc_reader { - struct rc_instruction * Inst; - unsigned int WriteMask; - union { - struct { - struct rc_src_register * Src; - } I; - struct { - struct rc_pair_instruction_arg * Arg; - struct rc_pair_instruction_source * Src; - } P; - } U; -}; - -struct rc_reader_data { - unsigned int Abort; - unsigned int AbortOnRead; - unsigned int AbortOnWrite; - unsigned int LoopDepth; - unsigned int InElse; - struct rc_instruction * Writer; - - unsigned int ReaderCount; - unsigned int ReadersReserved; - struct rc_reader * Readers; - - /* If this flag is enabled, rc_get_readers will exit as soon possbile - * after the Abort flag is set.*/ - unsigned int ExitOnAbort; - void * CbData; -}; - -void rc_get_readers( - struct radeon_compiler * c, - struct rc_instruction * writer, - struct rc_reader_data * data, - rc_read_src_fn read_normal_cb, - rc_pair_read_arg_fn read_pair_cb, - rc_read_write_mask_fn write_cb); - -void rc_get_readers_sub( - struct radeon_compiler * c, - struct rc_instruction * writer, - struct rc_pair_sub_instruction * sub_writer, - struct rc_reader_data * data, - rc_read_src_fn read_normal_cb, - rc_pair_read_arg_fn read_pair_cb, - rc_read_write_mask_fn write_cb); -/** - * Compiler passes based on dataflow analysis. - */ -/*@{*/ -typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data, - void (*mark_fn)(void * data, unsigned int index, unsigned int mask)); -void rc_dataflow_deadcode(struct radeon_compiler * c, void *user); -void rc_dataflow_swizzles(struct radeon_compiler * c, void *user); -/*@}*/ - -void rc_optimize(struct radeon_compiler * c, void *user); - -#endif /* RADEON_DATAFLOW_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c deleted file mode 100644 index 678e147..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_dataflow.h" - -#include "radeon_compiler.h" - - -struct updatemask_state { - unsigned char Output[RC_REGISTER_MAX_INDEX]; - unsigned char Temporary[RC_REGISTER_MAX_INDEX]; - unsigned char Address; - unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; -}; - -struct instruction_state { - unsigned char WriteMask:4; - unsigned char WriteALUResult:1; - unsigned char SrcReg[3]; -}; - -struct loopinfo { - struct updatemask_state * Breaks; - unsigned int BreakCount; - unsigned int BreaksReserved; -}; - -struct branchinfo { - unsigned int HaveElse:1; - - struct updatemask_state StoreEndif; - struct updatemask_state StoreElse; -}; - -struct deadcode_state { - struct radeon_compiler * C; - struct instruction_state * Instructions; - - struct updatemask_state R; - - struct branchinfo * BranchStack; - unsigned int BranchStackSize; - unsigned int BranchStackReserved; - - struct loopinfo * LoopStack; - unsigned int LoopStackSize; - unsigned int LoopStackReserved; -}; - - -static void or_updatemasks( - struct updatemask_state * dst, - struct updatemask_state * a, - struct updatemask_state * b) -{ - for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { - dst->Output[i] = a->Output[i] | b->Output[i]; - dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; - } - - for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) - dst->Special[i] = a->Special[i] | b->Special[i]; - - dst->Address = a->Address | b->Address; -} - -static void push_break(struct deadcode_state *s) -{ - struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1]; - memory_pool_array_reserve(&s->C->Pool, struct updatemask_state, - loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1); - - memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R)); -} - -static void push_loop(struct deadcode_state * s) -{ - memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, - s->LoopStackSize, s->LoopStackReserved, 1); - memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); -} - -static void push_branch(struct deadcode_state * s) -{ - struct branchinfo * branch; - - memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, - s->BranchStackSize, s->BranchStackReserved, 1); - - branch = &s->BranchStack[s->BranchStackSize++]; - branch->HaveElse = 0; - memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); -} - -static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) -{ - if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { - if (index >= RC_REGISTER_MAX_INDEX) { - rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file); - return 0; - } - - if (file == RC_FILE_OUTPUT) - return &s->R.Output[index]; - else - return &s->R.Temporary[index]; - } else if (file == RC_FILE_ADDRESS) { - return &s->R.Address; - } else if (file == RC_FILE_SPECIAL) { - if (index >= RC_NUM_SPECIAL_REGISTERS) { - rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index); - return 0; - } - - return &s->R.Special[index]; - } - - return 0; -} - -static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask) -{ - unsigned char * pused = get_used_ptr(s, file, index); - if (pused) - *pused |= mask; -} - -static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) -{ - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - struct instruction_state * insts = &s->Instructions[inst->IP]; - unsigned int usedmask = 0; - unsigned int srcmasks[3]; - - if (opcode->HasDstReg) { - unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); - if (pused) { - usedmask = *pused & inst->U.I.DstReg.WriteMask; - *pused &= ~usedmask; - } - } - - insts->WriteMask |= usedmask; - - if (inst->U.I.WriteALUResult) { - unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); - if (pused && *pused) { - if (inst->U.I.WriteALUResult == RC_ALURESULT_X) - usedmask |= RC_MASK_X; - else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) - usedmask |= RC_MASK_W; - - *pused = 0; - insts->WriteALUResult = 1; - } - } - - rc_compute_sources_for_writemask(inst, usedmask, srcmasks); - - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { - unsigned int refmask = 0; - unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; - insts->SrcReg[src] |= newsrcmask; - - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_BIT(newsrcmask, chan)) - refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); - } - - /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ - refmask &= RC_MASK_XYZW; - - if (!refmask) - continue; - - mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); - - if (inst->U.I.SrcReg[src].RelAddr) - mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); - } -} - -static void mark_output_use(void * data, unsigned int index, unsigned int mask) -{ - struct deadcode_state * s = data; - - mark_used(s, RC_FILE_OUTPUT, index, mask); -} - -void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) -{ - struct deadcode_state s; - unsigned int nr_instructions; - rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user; - unsigned int ip; - - memset(&s, 0, sizeof(s)); - s.C = c; - - nr_instructions = rc_recompute_ips(c); - s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions); - memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions); - - dce(c, &s, &mark_output_use); - - for(struct rc_instruction * inst = c->Program.Instructions.Prev; - inst != &c->Program.Instructions; - inst = inst->Prev) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - - switch(opcode->Opcode){ - /* Mark all sources in the loop body as used before doing - * normal deadcode analysis. This is probably not optimal. - */ - case RC_OPCODE_ENDLOOP: - { - int endloops = 1; - struct rc_instruction *ptr; - for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){ - opcode = rc_get_opcode_info(ptr->U.I.Opcode); - if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ - endloops--; - continue; - } - if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){ - endloops++; - continue; - } - if(opcode->HasDstReg){ - int src = 0; - unsigned int srcmasks[3]; - rc_compute_sources_for_writemask(ptr, - ptr->U.I.DstReg.WriteMask, srcmasks); - for(src=0; src < opcode->NumSrcRegs; src++){ - mark_used(&s, - ptr->U.I.SrcReg[src].File, - ptr->U.I.SrcReg[src].Index, - srcmasks[src]); - } - } - } - push_loop(&s); - break; - } - case RC_OPCODE_BRK: - push_break(&s); - break; - case RC_OPCODE_BGNLOOP: - { - unsigned int i; - struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1]; - for(i = 0; i < loop->BreakCount; i++) { - or_updatemasks(&s.R, &s.R, &loop->Breaks[i]); - } - break; - } - case RC_OPCODE_CONT: - break; - case RC_OPCODE_ENDIF: - push_branch(&s); - break; - default: - if (opcode->IsFlowControl && s.BranchStackSize) { - struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; - if (opcode->Opcode == RC_OPCODE_IF) { - or_updatemasks(&s.R, - &s.R, - branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); - - s.BranchStackSize--; - } else if (opcode->Opcode == RC_OPCODE_ELSE) { - if (branch->HaveElse) { - rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); - } else { - memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); - memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); - branch->HaveElse = 1; - } - } else { - rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); - } - } - } - - update_instruction(&s, inst); - } - - ip = 0; - for(struct rc_instruction * inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next, ++ip) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - int dead = 1; - unsigned int srcmasks[3]; - unsigned int usemask; - - if (!opcode->HasDstReg) { - dead = 0; - } else { - inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; - if (s.Instructions[ip].WriteMask) - dead = 0; - - if (s.Instructions[ip].WriteALUResult) - dead = 0; - else - inst->U.I.WriteALUResult = RC_ALURESULT_NONE; - } - - if (dead) { - struct rc_instruction * todelete = inst; - inst = inst->Prev; - rc_remove_instruction(todelete); - continue; - } - - usemask = s.Instructions[ip].WriteMask; - - if (inst->U.I.WriteALUResult == RC_ALURESULT_X) - usemask |= RC_MASK_X; - else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) - usemask |= RC_MASK_W; - - rc_compute_sources_for_writemask(inst, usemask, srcmasks); - - for(unsigned int src = 0; src < 3; ++src) { - for(unsigned int chan = 0; chan < 4; ++chan) { - if (!GET_BIT(srcmasks[src], chan)) - SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); - } - } - } - - rc_calculate_inputs_outputs(c); -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c deleted file mode 100644 index 133a9f7..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_dataflow.h" - -#include "radeon_compiler.h" -#include "radeon_swizzle.h" - - -static void rewrite_source(struct radeon_compiler * c, - struct rc_instruction * inst, unsigned src) -{ - struct rc_swizzle_split split; - unsigned int tempreg = rc_find_free_temporary(c); - unsigned int usemask; - - usemask = 0; - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) - usemask |= 1 << chan; - } - - c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); - - for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { - struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); - unsigned int phase_refmask; - unsigned int masked_negate; - - mov->U.I.Opcode = RC_OPCODE_MOV; - mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - mov->U.I.DstReg.Index = tempreg; - mov->U.I.DstReg.WriteMask = split.Phase[phase]; - mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; - mov->U.I.PreSub = inst->U.I.PreSub; - - phase_refmask = 0; - for(unsigned int chan = 0; chan < 4; ++chan) { - if (!GET_BIT(split.Phase[phase], chan)) - SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); - else - phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); - } - - phase_refmask &= RC_MASK_XYZW; - - masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; - if (masked_negate == 0) - mov->U.I.SrcReg[0].Negate = 0; - else if (masked_negate == split.Phase[phase]) - mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; - - } - - inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[src].Index = tempreg; - inst->U.I.SrcReg[src].Swizzle = 0; - inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; - inst->U.I.SrcReg[src].Abs = 0; - for(unsigned int chan = 0; chan < 4; ++chan) { - SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, - GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); - } -} - -void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) -{ - struct rc_instruction * inst; - - for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - unsigned int src; - - for(src = 0; src < opcode->NumSrcRegs; ++src) { - if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) - rewrite_source(c, inst, src); - } - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c deleted file mode 100644 index 7bede34..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c +++ /dev/null @@ -1,342 +0,0 @@ -/* - * Copyright 2009 Nicolai Hähnle - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "radeon_emulate_branches.h" - -#include - -#include "radeon_compiler.h" -#include "radeon_dataflow.h" - -#define VERBOSE 0 - -#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) - - -struct proxy_info { - unsigned int Proxied:1; - unsigned int Index:RC_REGISTER_INDEX_BITS; -}; - -struct register_proxies { - struct proxy_info Temporary[RC_REGISTER_MAX_INDEX]; -}; - -struct branch_info { - struct rc_instruction * If; - struct rc_instruction * Else; -}; - -struct emulate_branch_state { - struct radeon_compiler * C; - - struct branch_info * Branches; - unsigned int BranchCount; - unsigned int BranchReserved; -}; - - -static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst) -{ - struct branch_info * branch; - struct rc_instruction * inst_mov; - - memory_pool_array_reserve(&s->C->Pool, struct branch_info, - s->Branches, s->BranchCount, s->BranchReserved, 1); - - DBG("%s\n", __FUNCTION__); - - branch = &s->Branches[s->BranchCount++]; - memset(branch, 0, sizeof(struct branch_info)); - branch->If = inst; - - /* Make a safety copy of the decision register, because we will need - * it at ENDIF time and it might be overwritten in both branches. */ - inst_mov = rc_insert_new_instruction(s->C, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C); - inst_mov->U.I.DstReg.WriteMask = RC_MASK_X; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; - inst->U.I.SrcReg[0].Swizzle = 0; - inst->U.I.SrcReg[0].Abs = 0; - inst->U.I.SrcReg[0].Negate = 0; -} - -static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst) -{ - struct branch_info * branch; - - if (!s->BranchCount) { - rc_error(s->C, "Encountered ELSE outside of branches"); - return; - } - - DBG("%s\n", __FUNCTION__); - - branch = &s->Branches[s->BranchCount - 1]; - branch->Else = inst; -} - - -struct state_and_proxies { - struct emulate_branch_state * S; - struct register_proxies * Proxies; -}; - -static struct proxy_info * get_proxy_info(struct state_and_proxies * sap, - rc_register_file file, unsigned int index) -{ - if (file == RC_FILE_TEMPORARY) { - return &sap->Proxies->Temporary[index]; - } else { - return 0; - } -} - -static void scan_write(void * userdata, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int comp) -{ - struct state_and_proxies * sap = userdata; - struct proxy_info * proxy = get_proxy_info(sap, file, index); - - if (proxy && !proxy->Proxied) { - proxy->Proxied = 1; - proxy->Index = rc_find_free_temporary(sap->S->C); - } -} - -static void remap_proxy_function(void * userdata, struct rc_instruction * inst, - rc_register_file * pfile, unsigned int * pindex) -{ - struct state_and_proxies * sap = userdata; - struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex); - - if (proxy && proxy->Proxied) { - *pfile = RC_FILE_TEMPORARY; - *pindex = proxy->Index; - } -} - -/** - * Redirect all writes in the instruction range [begin, end) to proxy - * temporary registers. - */ -static void allocate_and_insert_proxies(struct emulate_branch_state * s, - struct register_proxies * proxies, - struct rc_instruction * begin, - struct rc_instruction * end) -{ - struct state_and_proxies sap; - - sap.S = s; - sap.Proxies = proxies; - - for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { - rc_for_all_writes_mask(inst, scan_write, &sap); - rc_remap_registers(inst, remap_proxy_function, &sap); - } - - for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { - if (proxies->Temporary[index].Proxied) { - struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index; - inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; - inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mov->U.I.SrcReg[0].Index = index; - } - } -} - - -static void inject_cmp(struct emulate_branch_state * s, - struct rc_instruction * inst_if, - struct rc_instruction * inst_endif, - rc_register_file file, unsigned int index, - struct proxy_info ifproxy, - struct proxy_info elseproxy) -{ - struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif); - inst_cmp->U.I.Opcode = RC_OPCODE_CMP; - inst_cmp->U.I.DstReg.File = file; - inst_cmp->U.I.DstReg.Index = index; - inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW; - inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; - inst_cmp->U.I.SrcReg[0].Abs = 1; - inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW; - inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index; - inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; - inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index; -} - -static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst) -{ - struct branch_info * branch; - struct register_proxies IfProxies; - struct register_proxies ElseProxies; - - if (!s->BranchCount) { - rc_error(s->C, "Encountered ENDIF outside of branches"); - return; - } - - DBG("%s\n", __FUNCTION__); - - branch = &s->Branches[s->BranchCount - 1]; - - memset(&IfProxies, 0, sizeof(IfProxies)); - memset(&ElseProxies, 0, sizeof(ElseProxies)); - - allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst); - - if (branch->Else) - allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst); - - /* Insert the CMP instructions at the end. */ - for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { - if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) { - inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index, - IfProxies.Temporary[index], ElseProxies.Temporary[index]); - } - } - - /* Remove all traces of the branch instructions */ - rc_remove_instruction(branch->If); - if (branch->Else) - rc_remove_instruction(branch->Else); - rc_remove_instruction(inst); - - s->BranchCount--; - - if (VERBOSE) { - DBG("Program after ENDIF handling:\n"); - rc_print_program(&s->C->Program); - } -} - - -struct remap_output_data { - unsigned int Output:RC_REGISTER_INDEX_BITS; - unsigned int Temporary:RC_REGISTER_INDEX_BITS; -}; - -static void remap_output_function(void * userdata, struct rc_instruction * inst, - rc_register_file * pfile, unsigned int * pindex) -{ - struct remap_output_data * data = userdata; - - if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) { - *pfile = RC_FILE_TEMPORARY; - *pindex = data->Temporary; - } -} - - -/** - * Output registers cannot be read from and so cannot be dealt with like - * temporary registers. - * - * We do the simplest thing: If an output registers is written within - * a branch, then *all* writes to this register are proxied to a - * temporary register, and a final MOV is appended to the end of - * the program. - */ -static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst) -{ - const struct rc_opcode_info * opcode; - - if (!s->BranchCount) - return; - - opcode = rc_get_opcode_info(inst->U.I.Opcode); - - if (!opcode->HasDstReg) - return; - - if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) { - struct remap_output_data remap; - struct rc_instruction * inst_mov; - - remap.Output = inst->U.I.DstReg.Index; - remap.Temporary = rc_find_free_temporary(s->C); - - for(struct rc_instruction * inst = s->C->Program.Instructions.Next; - inst != &s->C->Program.Instructions; - inst = inst->Next) { - rc_remap_registers(inst, &remap_output_function, &remap); - } - - inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT; - inst_mov->U.I.DstReg.Index = remap.Output; - inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; - inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mov->U.I.SrcReg[0].Index = remap.Temporary; - } -} - -/** - * Remove branch instructions; instead, execute both branches - * on different register sets and choose between their results - * using CMP instructions in place of the original ENDIF. - */ -void rc_emulate_branches(struct radeon_compiler *c, void *user) -{ - struct emulate_branch_state s; - struct rc_instruction * ptr; - - memset(&s, 0, sizeof(s)); - s.C = c; - - /* Untypical loop because we may remove the current instruction */ - ptr = c->Program.Instructions.Next; - while(ptr != &c->Program.Instructions) { - struct rc_instruction * inst = ptr; - ptr = ptr->Next; - - if (inst->Type == RC_INSTRUCTION_NORMAL) { - switch(inst->U.I.Opcode) { - case RC_OPCODE_IF: - handle_if(&s, inst); - break; - case RC_OPCODE_ELSE: - handle_else(&s, inst); - break; - case RC_OPCODE_ENDIF: - handle_endif(&s, inst); - break; - default: - fix_output_writes(&s, inst); - break; - } - } else { - rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__); - } - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h deleted file mode 100644 index 818ab84..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright 2009 Nicolai Hähnle - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef RADEON_EMULATE_BRANCHES_H -#define RADEON_EMULATE_BRANCHES_H - -struct radeon_compiler; - -void rc_emulate_branches(struct radeon_compiler *c, void *user); - -#endif /* RADEON_EMULATE_BRANCHES_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c deleted file mode 100644 index 205eecd..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ /dev/null @@ -1,522 +0,0 @@ -/* - * Copyright 2010 Tom Stellard - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * \file - */ - -#include "radeon_emulate_loops.h" - -#include "radeon_compiler.h" -#include "radeon_dataflow.h" - -#define VERBOSE 0 - -#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) - -struct const_value { - struct radeon_compiler * C; - struct rc_src_register * Src; - float Value; - int HasValue; -}; - -struct count_inst { - struct radeon_compiler * C; - int Index; - rc_swizzle Swz; - float Amount; - int Unknown; -}; - -static float get_constant_value(struct radeon_compiler * c, - struct rc_src_register * src, - int chan) -{ - float base = 1.0f; - int swz = GET_SWZ(src->Swizzle, chan); - if(swz >= 4 || src->Index >= c->Program.Constants.Count ){ - rc_error(c, "get_constant_value: Can't find a value.\n"); - return 0.0f; - } - if(GET_BIT(src->Negate, chan)){ - base = -1.0f; - } - return base * - c->Program.Constants.Constants[src->Index].u.Immediate[swz]; -} - -static int src_reg_is_immediate(struct rc_src_register * src, - struct radeon_compiler * c) -{ - return src->File == RC_FILE_CONSTANT && - c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE; -} - -static unsigned int loop_max_possible_iterations(struct radeon_compiler *c, - struct loop_info * loop) -{ - unsigned int total_i = rc_recompute_ips(c); - unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; - /* +1 because the program already has one iteration of the loop. */ - return 1 + ((c->max_alu_insts - total_i) / loop_i); -} - -static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, - unsigned int iterations) -{ - unsigned int i; - struct rc_instruction * ptr; - struct rc_instruction * first = loop->BeginLoop->Next; - struct rc_instruction * last = loop->EndLoop->Prev; - struct rc_instruction * append_to = last; - rc_remove_instruction(loop->BeginLoop); - rc_remove_instruction(loop->EndLoop); - for( i = 1; i < iterations; i++){ - for(ptr = first; ptr != last->Next; ptr = ptr->Next){ - struct rc_instruction *new = rc_alloc_instruction(c); - memcpy(new, ptr, sizeof(struct rc_instruction)); - rc_insert_instruction(append_to, new); - append_to = new; - } - } -} - - -static void update_const_value(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) -{ - struct const_value * value = data; - if(value->Src->File != file || - value->Src->Index != index || - !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ - return; - } - switch(inst->U.I.Opcode){ - case RC_OPCODE_MOV: - if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){ - return; - } - value->HasValue = 1; - value->Value = - get_constant_value(value->C, &inst->U.I.SrcReg[0], 0); - break; - } -} - -static void get_incr_amount(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) -{ - struct count_inst * count_inst = data; - int amnt_src_index; - const struct rc_opcode_info * opcode; - float amount; - - if(file != RC_FILE_TEMPORARY || - count_inst->Index != index || - (1 << GET_SWZ(count_inst->Swz,0) != mask)){ - return; - } - /* Find the index of the counter register. */ - opcode = rc_get_opcode_info(inst->U.I.Opcode); - if(opcode->NumSrcRegs != 2){ - count_inst->Unknown = 1; - return; - } - if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[0].Index == count_inst->Index && - inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){ - amnt_src_index = 1; - } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[1].Index == count_inst->Index && - inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){ - amnt_src_index = 0; - } - else{ - count_inst->Unknown = 1; - return; - } - if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index], - count_inst->C)){ - amount = get_constant_value(count_inst->C, - &inst->U.I.SrcReg[amnt_src_index], 0); - } - else{ - count_inst->Unknown = 1 ; - return; - } - switch(inst->U.I.Opcode){ - case RC_OPCODE_ADD: - count_inst->Amount += amount; - break; - case RC_OPCODE_SUB: - if(amnt_src_index == 0){ - count_inst->Unknown = 0; - return; - } - count_inst->Amount -= amount; - break; - default: - count_inst->Unknown = 1; - return; - } -} - -/** - * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless - * of how many iterations they have. - */ -static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop) -{ - int end_loops; - int iterations; - struct count_inst count_inst; - float limit_value; - struct rc_src_register * counter; - struct rc_src_register * limit; - struct const_value counter_value; - struct rc_instruction * inst; - - /* Find the counter and the upper limit */ - - if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){ - limit = &loop->Cond->U.I.SrcReg[0]; - counter = &loop->Cond->U.I.SrcReg[1]; - } - else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){ - limit = &loop->Cond->U.I.SrcReg[1]; - counter = &loop->Cond->U.I.SrcReg[0]; - } - else{ - DBG("No constant limit.\n"); - return 0; - } - - /* Find the initial value of the counter */ - counter_value.Src = counter; - counter_value.Value = 0.0f; - counter_value.HasValue = 0; - counter_value.C = c; - for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop; - inst = inst->Next){ - rc_for_all_writes_mask(inst, update_const_value, &counter_value); - } - if(!counter_value.HasValue){ - DBG("Initial counter value cannot be determined.\n"); - return 0; - } - DBG("Initial counter value is %f\n", counter_value.Value); - /* Determine how the counter is modified each loop */ - count_inst.C = c; - count_inst.Index = counter->Index; - count_inst.Swz = counter->Swizzle; - count_inst.Amount = 0.0f; - count_inst.Unknown = 0; - end_loops = 1; - for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ - switch(inst->U.I.Opcode){ - /* XXX In the future we might want to try to unroll nested - * loops here.*/ - case RC_OPCODE_BGNLOOP: - end_loops++; - break; - case RC_OPCODE_ENDLOOP: - loop->EndLoop = inst; - end_loops--; - break; - case RC_OPCODE_BRK: - /* Don't unroll loops if it has a BRK instruction - * other one used when testing the main conditional - * of the loop. */ - - /* Make sure we haven't entered a nested loops. */ - if(inst != loop->Brk && end_loops == 1) { - return 0; - } - break; - /* XXX Check if the counter is modified within an if statement. - */ - case RC_OPCODE_IF: - break; - default: - rc_for_all_writes_mask(inst, get_incr_amount, &count_inst); - if(count_inst.Unknown){ - return 0; - } - break; - } - } - /* Infinite loop */ - if(count_inst.Amount == 0.0f){ - return 0; - } - DBG("Counter is increased by %f each iteration.\n", count_inst.Amount); - /* Calculate the number of iterations of this loop. Keeping this - * simple, since we only support increment and decrement loops. - */ - limit_value = get_constant_value(c, limit, 0); - DBG("Limit is %f.\n", limit_value); - /* The iteration calculations are opposite of what you would expect. - * In a normal loop, if the condition is met, then loop continues, but - * with our loops, if the condition is met, the is exited. */ - switch(loop->Cond->U.I.Opcode){ - case RC_OPCODE_SGE: - case RC_OPCODE_SLE: - iterations = (int) ceilf((limit_value - counter_value.Value) / - count_inst.Amount); - break; - - case RC_OPCODE_SGT: - case RC_OPCODE_SLT: - iterations = (int) floorf((limit_value - counter_value.Value) / - count_inst.Amount) + 1; - break; - default: - return 0; - } - - if (c->max_alu_insts > 0 - && iterations > loop_max_possible_iterations(c, loop)) { - return 0; - } - - DBG("Loop will have %d iterations.\n", iterations); - - /* Prepare loop for unrolling */ - rc_remove_instruction(loop->Cond); - rc_remove_instruction(loop->If); - rc_remove_instruction(loop->Brk); - rc_remove_instruction(loop->EndIf); - - unroll_loop(c, loop, iterations); - loop->EndLoop = NULL; - return 1; -} - -/** - * @param c - * @param loop - * @param inst A pointer to a BGNLOOP instruction. - * @return 1 if all of the members of loop where set. - * @return 0 if there was an error and some members of loop are still NULL. - */ -static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, - struct rc_instruction * inst) -{ - struct rc_instruction * ptr; - - if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ - rc_error(c, "%s: expected BGNLOOP", __FUNCTION__); - return 0; - } - - memset(loop, 0, sizeof(struct loop_info)); - - loop->BeginLoop = inst; - - for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) { - - if (ptr == &c->Program.Instructions) { - rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n", - __FUNCTION__); - return 0; - } - - switch(ptr->U.I.Opcode){ - case RC_OPCODE_BGNLOOP: - { - /* Nested loop, skip ahead to the end. */ - unsigned int loop_depth = 1; - for(ptr = ptr->Next; ptr != &c->Program.Instructions; - ptr = ptr->Next){ - if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { - loop_depth++; - } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { - if (!--loop_depth) { - break; - } - } - } - if (ptr == &c->Program.Instructions) { - rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n", - __FUNCTION__); - return 0; - } - break; - } - case RC_OPCODE_BRK: - if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF - || ptr->Prev->U.I.Opcode != RC_OPCODE_IF - || loop->Brk){ - continue; - } - loop->Brk = ptr; - loop->If = ptr->Prev; - loop->EndIf = ptr->Next; - switch(loop->If->Prev->U.I.Opcode){ - case RC_OPCODE_SLT: - case RC_OPCODE_SGE: - case RC_OPCODE_SGT: - case RC_OPCODE_SLE: - case RC_OPCODE_SEQ: - case RC_OPCODE_SNE: - break; - default: - return 0; - } - loop->Cond = loop->If->Prev; - break; - - case RC_OPCODE_ENDLOOP: - loop->EndLoop = ptr; - break; - } - } - - if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf - && loop->Cond && loop->EndLoop) { - return 1; - } - return 0; -} - -/** - * This function prepares a loop to be unrolled by converting it into an if - * statement. Here is an outline of the conversion process: - * BGNLOOP; -> BGNLOOP; - * -> - * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; - * IF temp[0]; -> IF temp[0]; - * BRK; -> - * ENDIF; -> - * -> ENDIF; - * ENDLOOP; -> ENDLOOP - * - * @param inst A pointer to a BGNLOOP instruction. - * @return 1 for success, 0 for failure - */ -static int transform_loop(struct emulate_loop_state * s, - struct rc_instruction * inst) -{ - struct loop_info * loop; - - memory_pool_array_reserve(&s->C->Pool, struct loop_info, - s->Loops, s->LoopCount, s->LoopReserved, 1); - - loop = &s->Loops[s->LoopCount++]; - - if (!build_loop_info(s->C, loop, inst)) { - rc_error(s->C, "Failed to build loop info\n"); - return 0; - } - - if(try_unroll_loop(s->C, loop)){ - return 1; - } - - /* Reverse the conditional instruction */ - switch(loop->Cond->U.I.Opcode){ - case RC_OPCODE_SGE: - loop->Cond->U.I.Opcode = RC_OPCODE_SLT; - break; - case RC_OPCODE_SLT: - loop->Cond->U.I.Opcode = RC_OPCODE_SGE; - break; - case RC_OPCODE_SLE: - loop->Cond->U.I.Opcode = RC_OPCODE_SGT; - break; - case RC_OPCODE_SGT: - loop->Cond->U.I.Opcode = RC_OPCODE_SLE; - break; - case RC_OPCODE_SEQ: - loop->Cond->U.I.Opcode = RC_OPCODE_SNE; - break; - case RC_OPCODE_SNE: - loop->Cond->U.I.Opcode = RC_OPCODE_SEQ; - break; - default: - rc_error(s->C, "loop->Cond is not a conditional.\n"); - return 0; - } - - /* Prepare the loop to be emulated */ - rc_remove_instruction(loop->Brk); - rc_remove_instruction(loop->EndIf); - rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); - return 1; -} - -void rc_transform_loops(struct radeon_compiler *c, void *user) -{ - struct emulate_loop_state * s = &c->loop_state; - struct rc_instruction * ptr; - - memset(s, 0, sizeof(struct emulate_loop_state)); - s->C = c; - for(ptr = s->C->Program.Instructions.Next; - ptr != &s->C->Program.Instructions; ptr = ptr->Next) { - if(ptr->Type == RC_INSTRUCTION_NORMAL && - ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ - if (!transform_loop(s, ptr)) - return; - } - } -} - -void rc_unroll_loops(struct radeon_compiler *c, void *user) -{ - struct rc_instruction * inst; - struct loop_info loop; - - for(inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; inst = inst->Next) { - - if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { - if (build_loop_info(c, &loop, inst)) { - try_unroll_loop(c, &loop); - } - } - } -} - -void rc_emulate_loops(struct radeon_compiler *c, void *user) -{ - struct emulate_loop_state * s = &c->loop_state; - int i; - /* Iterate backwards of the list of loops so that loops that nested - * loops are unrolled first. - */ - for( i = s->LoopCount - 1; i >= 0; i-- ){ - unsigned int iterations; - - if(!s->Loops[i].EndLoop){ - continue; - } - iterations = loop_max_possible_iterations(s->C, &s->Loops[i]); - unroll_loop(s->C, &s->Loops[i], iterations); - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h deleted file mode 100644 index cd800c0..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h +++ /dev/null @@ -1,32 +0,0 @@ - - -#ifndef RADEON_EMULATE_LOOPS_H -#define RADEON_EMULATE_LOOPS_H - -#define MAX_ITERATIONS 8 - -struct radeon_compiler; - -struct loop_info { - struct rc_instruction * BeginLoop; - struct rc_instruction * Cond; - struct rc_instruction * If; - struct rc_instruction * Brk; - struct rc_instruction * EndIf; - struct rc_instruction * EndLoop; -}; - -struct emulate_loop_state { - struct radeon_compiler * C; - struct loop_info * Loops; - unsigned int LoopCount; - unsigned int LoopReserved; -}; - -void rc_transform_loops(struct radeon_compiler *c, void *user); - -void rc_unroll_loops(struct radeon_compiler * c, void *user); - -void rc_emulate_loops(struct radeon_compiler * c, void *user); - -#endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_list.c b/src/mesa/drivers/dri/r300/compiler/radeon_list.c deleted file mode 100644 index 811c908..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_list.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright 2011 Tom Stellard - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_list.h" - -#include -#include - -#include "memory_pool.h" - -struct rc_list * rc_list(struct memory_pool * pool, void * item) -{ - struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list)); - new->Item = item; - new->Next = NULL; - new->Prev = NULL; - - return new; -} - -void rc_list_add(struct rc_list ** list, struct rc_list * new_value) -{ - struct rc_list * temp; - - if (*list == NULL) { - *list = new_value; - return; - } - - for (temp = *list; temp->Next; temp = temp->Next); - - temp->Next = new_value; - new_value->Prev = temp; -} - -void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value) -{ - if (*list == rm_value) { - *list = rm_value->Next; - return; - } - - rm_value->Prev->Next = rm_value->Next; - if (rm_value->Next) { - rm_value->Next->Prev = rm_value->Prev; - } -} - -unsigned int rc_list_count(struct rc_list * list) -{ - unsigned int count = 0; - while (list) { - count++; - list = list->Next; - } - return count; -} - -void rc_list_print(struct rc_list * list) -{ - while(list) { - fprintf(stderr, "%p->", list->Item); - list = list->Next; - } - fprintf(stderr, "\n"); -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_list.h b/src/mesa/drivers/dri/r300/compiler/radeon_list.h deleted file mode 100644 index b3c8f89..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_list.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2011 Tom Stellard - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef RADEON_LIST_H -#define RADEON_LIST_H - -struct memory_pool; - -struct rc_list { - void * Item; - struct rc_list * Prev; - struct rc_list * Next; -}; - -struct rc_list * rc_list(struct memory_pool * pool, void * item); -void rc_list_add(struct rc_list ** list, struct rc_list * new_value); -void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value); -unsigned int rc_list_count(struct rc_list * list); -void rc_list_print(struct rc_list * list); - -#endif /* RADEON_LIST_H */ - diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c deleted file mode 100644 index afd78ad..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ /dev/null @@ -1,546 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_opcodes.h" -#include "radeon_program.h" - -#include "radeon_program_constants.h" - -struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { - { - .Opcode = RC_OPCODE_NOP, - .Name = "NOP" - }, - { - .Opcode = RC_OPCODE_ILLEGAL_OPCODE, - .Name = "ILLEGAL OPCODE" - }, - { - .Opcode = RC_OPCODE_ABS, - .Name = "ABS", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_ADD, - .Name = "ADD", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_ARL, - .Name = "ARL", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_CEIL, - .Name = "CEIL", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_CLAMP, - .Name = "CLAMP", - .NumSrcRegs = 3, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_CMP, - .Name = "CMP", - .NumSrcRegs = 3, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_CND, - .Name = "CND", - .NumSrcRegs = 3, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_COS, - .Name = "COS", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_DDX, - .Name = "DDX", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_DDY, - .Name = "DDY", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_DP2, - .Name = "DP2", - .NumSrcRegs = 2, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_DP3, - .Name = "DP3", - .NumSrcRegs = 2, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_DP4, - .Name = "DP4", - .NumSrcRegs = 2, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_DPH, - .Name = "DPH", - .NumSrcRegs = 2, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_DST, - .Name = "DST", - .NumSrcRegs = 2, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_EX2, - .Name = "EX2", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_EXP, - .Name = "EXP", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_FLR, - .Name = "FLR", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_FRC, - .Name = "FRC", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_KIL, - .Name = "KIL", - .NumSrcRegs = 1 - }, - { - .Opcode = RC_OPCODE_LG2, - .Name = "LG2", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_LIT, - .Name = "LIT", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_LOG, - .Name = "LOG", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_LRP, - .Name = "LRP", - .NumSrcRegs = 3, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_MAD, - .Name = "MAD", - .NumSrcRegs = 3, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_MAX, - .Name = "MAX", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_MIN, - .Name = "MIN", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_MOV, - .Name = "MOV", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_MUL, - .Name = "MUL", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_POW, - .Name = "POW", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_RCP, - .Name = "RCP", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_RSQ, - .Name = "RSQ", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_SCS, - .Name = "SCS", - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_SEQ, - .Name = "SEQ", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_SFL, - .Name = "SFL", - .NumSrcRegs = 0, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_SGE, - .Name = "SGE", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_SGT, - .Name = "SGT", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_SIN, - .Name = "SIN", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsStandardScalar = 1 - }, - { - .Opcode = RC_OPCODE_SLE, - .Name = "SLE", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_SLT, - .Name = "SLT", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_SNE, - .Name = "SNE", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_SSG, - .Name = "SSG", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_SUB, - .Name = "SUB", - .NumSrcRegs = 2, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_SWZ, - .Name = "SWZ", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsComponentwise = 1 - }, - { - .Opcode = RC_OPCODE_XPD, - .Name = "XPD", - .NumSrcRegs = 2, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_TEX, - .Name = "TEX", - .HasTexture = 1, - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_TXB, - .Name = "TXB", - .HasTexture = 1, - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_TXD, - .Name = "TXD", - .HasTexture = 1, - .NumSrcRegs = 3, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_TXL, - .Name = "TXL", - .HasTexture = 1, - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_TXP, - .Name = "TXP", - .HasTexture = 1, - .NumSrcRegs = 1, - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_IF, - .Name = "IF", - .IsFlowControl = 1, - .NumSrcRegs = 1 - }, - { - .Opcode = RC_OPCODE_ELSE, - .Name = "ELSE", - .IsFlowControl = 1, - .NumSrcRegs = 0 - }, - { - .Opcode = RC_OPCODE_ENDIF, - .Name = "ENDIF", - .IsFlowControl = 1, - .NumSrcRegs = 0 - }, - { - .Opcode = RC_OPCODE_BGNLOOP, - .Name = "BGNLOOP", - .IsFlowControl = 1, - .NumSrcRegs = 0 - }, - { - .Opcode = RC_OPCODE_BRK, - .Name = "BRK", - .IsFlowControl = 1, - .NumSrcRegs = 0 - }, - { - .Opcode = RC_OPCODE_ENDLOOP, - .Name = "ENDLOOP", - .IsFlowControl = 1, - .NumSrcRegs = 0, - }, - { - .Opcode = RC_OPCODE_CONT, - .Name = "CONT", - .IsFlowControl = 1, - .NumSrcRegs = 0 - }, - { - .Opcode = RC_OPCODE_REPL_ALPHA, - .Name = "REPL_ALPHA", - .HasDstReg = 1 - }, - { - .Opcode = RC_OPCODE_BEGIN_TEX, - .Name = "BEGIN_TEX" - }, - { - .Opcode = RC_OPCODE_KILP, - .Name = "KILP", - } -}; - -void rc_compute_sources_for_writemask( - const struct rc_instruction *inst, - unsigned int writemask, - unsigned int *srcmasks) -{ - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - srcmasks[0] = 0; - srcmasks[1] = 0; - srcmasks[2] = 0; - - if (opcode->Opcode == RC_OPCODE_KIL) - srcmasks[0] |= RC_MASK_XYZW; - else if (opcode->Opcode == RC_OPCODE_IF) - srcmasks[0] |= RC_MASK_X; - - if (!writemask) - return; - - if (opcode->IsComponentwise) { - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) - srcmasks[src] |= writemask; - } else if (opcode->IsStandardScalar) { - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) - srcmasks[src] |= RC_MASK_X; - } else { - switch(opcode->Opcode) { - case RC_OPCODE_ARL: - srcmasks[0] |= RC_MASK_X; - break; - case RC_OPCODE_DP2: - srcmasks[0] |= RC_MASK_XY; - srcmasks[1] |= RC_MASK_XY; - break; - case RC_OPCODE_DP3: - case RC_OPCODE_XPD: - srcmasks[0] |= RC_MASK_XYZ; - srcmasks[1] |= RC_MASK_XYZ; - break; - case RC_OPCODE_DP4: - srcmasks[0] |= RC_MASK_XYZW; - srcmasks[1] |= RC_MASK_XYZW; - break; - case RC_OPCODE_DPH: - srcmasks[0] |= RC_MASK_XYZ; - srcmasks[1] |= RC_MASK_XYZW; - break; - case RC_OPCODE_TXB: - case RC_OPCODE_TXP: - case RC_OPCODE_TXL: - srcmasks[0] |= RC_MASK_W; - /* Fall through */ - case RC_OPCODE_TEX: - switch (inst->U.I.TexSrcTarget) { - case RC_TEXTURE_1D: - srcmasks[0] |= RC_MASK_X; - break; - case RC_TEXTURE_2D: - case RC_TEXTURE_RECT: - case RC_TEXTURE_1D_ARRAY: - srcmasks[0] |= RC_MASK_XY; - break; - case RC_TEXTURE_3D: - case RC_TEXTURE_CUBE: - case RC_TEXTURE_2D_ARRAY: - srcmasks[0] |= RC_MASK_XYZ; - break; - } - break; - case RC_OPCODE_TXD: - switch (inst->U.I.TexSrcTarget) { - case RC_TEXTURE_1D_ARRAY: - srcmasks[0] |= RC_MASK_Y; - /* Fall through. */ - case RC_TEXTURE_1D: - srcmasks[0] |= RC_MASK_X; - srcmasks[1] |= RC_MASK_X; - srcmasks[2] |= RC_MASK_X; - break; - case RC_TEXTURE_2D_ARRAY: - srcmasks[0] |= RC_MASK_Z; - /* Fall through. */ - case RC_TEXTURE_2D: - case RC_TEXTURE_RECT: - srcmasks[0] |= RC_MASK_XY; - srcmasks[1] |= RC_MASK_XY; - srcmasks[2] |= RC_MASK_XY; - break; - case RC_TEXTURE_3D: - case RC_TEXTURE_CUBE: - srcmasks[0] |= RC_MASK_XYZ; - srcmasks[1] |= RC_MASK_XYZ; - srcmasks[2] |= RC_MASK_XYZ; - break; - } - break; - case RC_OPCODE_DST: - srcmasks[0] |= RC_MASK_Y | RC_MASK_Z; - srcmasks[1] |= RC_MASK_Y | RC_MASK_W; - break; - case RC_OPCODE_EXP: - case RC_OPCODE_LOG: - srcmasks[0] |= RC_MASK_XY; - break; - case RC_OPCODE_LIT: - srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W; - break; - default: - break; - } - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h deleted file mode 100644 index b586882..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef RADEON_OPCODES_H -#define RADEON_OPCODES_H - -#include - -/** - * Opcodes understood by the Radeon compiler. - */ -typedef enum { - RC_OPCODE_NOP = 0, - RC_OPCODE_ILLEGAL_OPCODE, - - /** vec4 instruction: dst.c = abs(src0.c); */ - RC_OPCODE_ABS, - - /** vec4 instruction: dst.c = src0.c + src1.c; */ - RC_OPCODE_ADD, - - /** special instruction: load address register - * dst.x = floor(src.x), where dst must be an address register */ - RC_OPCODE_ARL, - - /** vec4 instruction: dst.c = ceil(src0.c) */ - RC_OPCODE_CEIL, - - /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */ - RC_OPCODE_CLAMP, - - /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ - RC_OPCODE_CMP, - - /** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */ - RC_OPCODE_CND, - - /** scalar instruction: dst = cos(src0.x) */ - RC_OPCODE_COS, - - /** special instruction: take vec4 partial derivative in X direction - * dst.c = d src0.c / dx */ - RC_OPCODE_DDX, - - /** special instruction: take vec4 partial derivative in Y direction - * dst.c = d src0.c / dy */ - RC_OPCODE_DDY, - - /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */ - RC_OPCODE_DP2, - - /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ - RC_OPCODE_DP3, - - /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */ - RC_OPCODE_DP4, - - /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */ - RC_OPCODE_DPH, - - /** special instruction, see ARB_fragment_program */ - RC_OPCODE_DST, - - /** scalar instruction: dst = 2**src0.x */ - RC_OPCODE_EX2, - - /** special instruction, see ARB_vertex_program */ - RC_OPCODE_EXP, - - /** vec4 instruction: dst.c = floor(src0.c) */ - RC_OPCODE_FLR, - - /** vec4 instruction: dst.c = src0.c - floor(src0.c) */ - RC_OPCODE_FRC, - - /** special instruction: stop execution if any component of src0 is negative */ - RC_OPCODE_KIL, - - /** scalar instruction: dst = log_2(src0.x) */ - RC_OPCODE_LG2, - - /** special instruction, see ARB_vertex_program */ - RC_OPCODE_LIT, - - /** special instruction, see ARB_vertex_program */ - RC_OPCODE_LOG, - - /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */ - RC_OPCODE_LRP, - - /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */ - RC_OPCODE_MAD, - - /** vec4 instruction: dst.c = max(src0.c, src1.c) */ - RC_OPCODE_MAX, - - /** vec4 instruction: dst.c = min(src0.c, src1.c) */ - RC_OPCODE_MIN, - - /** vec4 instruction: dst.c = src0.c */ - RC_OPCODE_MOV, - - /** vec4 instruction: dst.c = src0.c*src1.c */ - RC_OPCODE_MUL, - - /** scalar instruction: dst = src0.x ** src1.x */ - RC_OPCODE_POW, - - /** scalar instruction: dst = 1 / src0.x */ - RC_OPCODE_RCP, - - /** scalar instruction: dst = 1 / sqrt(src0.x) */ - RC_OPCODE_RSQ, - - /** special instruction, see ARB_fragment_program */ - RC_OPCODE_SCS, - - /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */ - RC_OPCODE_SEQ, - - /** vec4 instruction: dst.c = 0.0 */ - RC_OPCODE_SFL, - - /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */ - RC_OPCODE_SGE, - - /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */ - RC_OPCODE_SGT, - - /** scalar instruction: dst = sin(src0.x) */ - RC_OPCODE_SIN, - - /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */ - RC_OPCODE_SLE, - - /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */ - RC_OPCODE_SLT, - - /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ - RC_OPCODE_SNE, - - /** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */ - RC_OPCODE_SSG, - - /** vec4 instruction: dst.c = src0.c - src1.c */ - RC_OPCODE_SUB, - - /** vec4 instruction: dst.c = src0.c */ - RC_OPCODE_SWZ, - - /** special instruction, see ARB_fragment_program */ - RC_OPCODE_XPD, - - RC_OPCODE_TEX, - RC_OPCODE_TXB, - RC_OPCODE_TXD, - RC_OPCODE_TXL, - RC_OPCODE_TXP, - - /** branch instruction: - * If src0.x != 0.0, continue with the next instruction; - * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF. - */ - RC_OPCODE_IF, - - /** branch instruction: jump to matching RC_OPCODE_ENDIF */ - RC_OPCODE_ELSE, - - /** branch instruction: has no effect */ - RC_OPCODE_ENDIF, - - RC_OPCODE_BGNLOOP, - - RC_OPCODE_BRK, - - RC_OPCODE_ENDLOOP, - - RC_OPCODE_CONT, - - /** special instruction, used in R300-R500 fragment program pair instructions - * indicates that the result of the alpha operation shall be replicated - * across all other channels */ - RC_OPCODE_REPL_ALPHA, - - /** special instruction, used in R300-R500 fragment programs - * to indicate the start of a block of texture instructions that - * can run simultaneously. */ - RC_OPCODE_BEGIN_TEX, - - /** Stop execution of the shader (GLSL discard) */ - RC_OPCODE_KILP, - - MAX_RC_OPCODE -} rc_opcode; - - -struct rc_opcode_info { - rc_opcode Opcode; - const char * Name; - - /** true if the instruction reads from a texture. - * - * \note This is false for the KIL instruction, even though KIL is - * a texture instruction from a hardware point of view. */ - unsigned int HasTexture:1; - - unsigned int NumSrcRegs:2; - unsigned int HasDstReg:1; - - /** true if this instruction affects control flow */ - unsigned int IsFlowControl:1; - - /** true if this is a vector instruction that operates on components in parallel - * without any cross-component interaction */ - unsigned int IsComponentwise:1; - - /** true if this instruction sources only its operands X components - * to compute one result which is smeared across all output channels */ - unsigned int IsStandardScalar:1; -}; - -extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE]; - -static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) -{ - assert((unsigned int)opcode < MAX_RC_OPCODE); - assert(rc_opcodes[opcode].Opcode == opcode); - - return &rc_opcodes[opcode]; -} - -struct rc_instruction; - -void rc_compute_sources_for_writemask( - const struct rc_instruction *inst, - unsigned int writemask, - unsigned int *srcmasks); - -#endif /* RADEON_OPCODES_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c deleted file mode 100644 index 39dcb21..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ /dev/null @@ -1,700 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * Copyright 2010 Tom Stellard - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_dataflow.h" - -#include "radeon_compiler.h" -#include "radeon_compiler_util.h" -#include "radeon_swizzle.h" - -struct src_clobbered_reads_cb_data { - rc_register_file File; - unsigned int Index; - unsigned int Mask; - struct rc_reader_data * ReaderData; -}; - -typedef void (*rc_presub_replace_fn)(struct rc_instruction *, - struct rc_instruction *, - unsigned int); - -static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) -{ - struct rc_src_register combine; - combine.File = inner.File; - combine.Index = inner.Index; - combine.RelAddr = inner.RelAddr; - if (outer.Abs) { - combine.Abs = 1; - combine.Negate = outer.Negate; - } else { - combine.Abs = inner.Abs; - combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); - combine.Negate ^= outer.Negate; - } - combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); - return combine; -} - -static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, - struct rc_src_register * src) -{ - rc_register_file file = src->File; - struct rc_reader_data * reader_data = data; - - if(!rc_inst_can_use_presub(inst, - reader_data->Writer->U.I.PreSub.Opcode, - rc_swizzle_to_writemask(src->Swizzle), - src, - &reader_data->Writer->U.I.PreSub.SrcReg[0], - &reader_data->Writer->U.I.PreSub.SrcReg[1])) { - reader_data->Abort = 1; - return; - } - - /* XXX This could probably be handled better. */ - if (file == RC_FILE_ADDRESS) { - reader_data->Abort = 1; - return; - } - - /* These instructions cannot read from the constants file. - * see radeonTransformTEX() - */ - if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && - reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && - (inst->U.I.Opcode == RC_OPCODE_TEX || - inst->U.I.Opcode == RC_OPCODE_TXB || - inst->U.I.Opcode == RC_OPCODE_TXP || - inst->U.I.Opcode == RC_OPCODE_TXD || - inst->U.I.Opcode == RC_OPCODE_TXL || - inst->U.I.Opcode == RC_OPCODE_KIL)){ - reader_data->Abort = 1; - return; - } -} - -static void src_clobbered_reads_cb( - void * data, - struct rc_instruction * inst, - struct rc_src_register * src) -{ - struct src_clobbered_reads_cb_data * sc_data = data; - - if (src->File == sc_data->File - && src->Index == sc_data->Index - && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { - - sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; - } - - if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { - sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; - } -} - -static void is_src_clobbered_scan_write( - void * data, - struct rc_instruction * inst, - rc_register_file file, - unsigned int index, - unsigned int mask) -{ - struct src_clobbered_reads_cb_data sc_data; - struct rc_reader_data * reader_data = data; - sc_data.File = file; - sc_data.Index = index; - sc_data.Mask = mask; - sc_data.ReaderData = reader_data; - rc_for_all_reads_src(reader_data->Writer, - src_clobbered_reads_cb, &sc_data); -} - -static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) -{ - struct rc_reader_data reader_data; - unsigned int i; - - if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || - inst_mov->U.I.WriteALUResult || - inst_mov->U.I.SaturateMode) - return; - - /* Get a list of all the readers of this MOV instruction. */ - reader_data.ExitOnAbort = 1; - rc_get_readers(c, inst_mov, &reader_data, - copy_propagate_scan_read, NULL, - is_src_clobbered_scan_write); - - if (reader_data.Abort || reader_data.ReaderCount == 0) - return; - - /* Propagate the MOV instruction. */ - for (i = 0; i < reader_data.ReaderCount; i++) { - struct rc_instruction * inst = reader_data.Readers[i].Inst; - *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); - - if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) - inst->U.I.PreSub = inst_mov->U.I.PreSub; - } - - /* Finally, remove the original MOV instruction */ - rc_remove_instruction(inst_mov); -} - -/** - * Check if a source register is actually always the same - * swizzle constant. - */ -static int is_src_uniform_constant(struct rc_src_register src, - rc_swizzle * pswz, unsigned int * pnegate) -{ - int have_used = 0; - - if (src.File != RC_FILE_NONE) { - *pswz = 0; - return 0; - } - - for(unsigned int chan = 0; chan < 4; ++chan) { - unsigned int swz = GET_SWZ(src.Swizzle, chan); - if (swz < 4) { - *pswz = 0; - return 0; - } - if (swz == RC_SWIZZLE_UNUSED) - continue; - - if (!have_used) { - *pswz = swz; - *pnegate = GET_BIT(src.Negate, chan); - have_used = 1; - } else { - if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { - *pswz = 0; - return 0; - } - } - } - - return 1; -} - -static void constant_folding_mad(struct rc_instruction * inst) -{ - rc_swizzle swz = 0; - unsigned int negate= 0; - - if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { - if (swz == RC_SWIZZLE_ZERO) { - inst->U.I.Opcode = RC_OPCODE_MUL; - return; - } - } - - if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { - if (swz == RC_SWIZZLE_ONE) { - inst->U.I.Opcode = RC_OPCODE_ADD; - if (negate) - inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; - inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; - return; - } else if (swz == RC_SWIZZLE_ZERO) { - inst->U.I.Opcode = RC_OPCODE_MOV; - inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; - return; - } - } - - if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { - if (swz == RC_SWIZZLE_ONE) { - inst->U.I.Opcode = RC_OPCODE_ADD; - if (negate) - inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; - inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; - return; - } else if (swz == RC_SWIZZLE_ZERO) { - inst->U.I.Opcode = RC_OPCODE_MOV; - inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; - return; - } - } -} - -static void constant_folding_mul(struct rc_instruction * inst) -{ - rc_swizzle swz = 0; - unsigned int negate = 0; - - if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { - if (swz == RC_SWIZZLE_ONE) { - inst->U.I.Opcode = RC_OPCODE_MOV; - inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; - if (negate) - inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; - return; - } else if (swz == RC_SWIZZLE_ZERO) { - inst->U.I.Opcode = RC_OPCODE_MOV; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; - return; - } - } - - if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { - if (swz == RC_SWIZZLE_ONE) { - inst->U.I.Opcode = RC_OPCODE_MOV; - if (negate) - inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; - return; - } else if (swz == RC_SWIZZLE_ZERO) { - inst->U.I.Opcode = RC_OPCODE_MOV; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; - return; - } - } -} - -static void constant_folding_add(struct rc_instruction * inst) -{ - rc_swizzle swz = 0; - unsigned int negate = 0; - - if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { - if (swz == RC_SWIZZLE_ZERO) { - inst->U.I.Opcode = RC_OPCODE_MOV; - inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; - return; - } - } - - if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { - if (swz == RC_SWIZZLE_ZERO) { - inst->U.I.Opcode = RC_OPCODE_MOV; - return; - } - } -} - -/** - * Replace 0.0, 1.0 and 0.5 immediate constants by their - * respective swizzles. Simplify instructions like ADD dst, src, 0; - */ -static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) -{ - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - unsigned int i; - - /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { - struct rc_constant * constant; - struct rc_src_register newsrc; - int have_real_reference; - unsigned int chan; - - /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ - for (chan = 0; chan < 4; ++chan) - if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) - break; - if (chan == 4) { - inst->U.I.SrcReg[src].File = RC_FILE_NONE; - continue; - } - - /* Convert immediates to swizzles. */ - if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || - inst->U.I.SrcReg[src].RelAddr || - inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) - continue; - - constant = - &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; - - if (constant->Type != RC_CONSTANT_IMMEDIATE) - continue; - - newsrc = inst->U.I.SrcReg[src]; - have_real_reference = 0; - for (chan = 0; chan < 4; ++chan) { - unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); - unsigned int newswz; - float imm; - float baseimm; - - if (swz >= 4) - continue; - - imm = constant->u.Immediate[swz]; - baseimm = imm; - if (imm < 0.0) - baseimm = -baseimm; - - if (baseimm == 0.0) { - newswz = RC_SWIZZLE_ZERO; - } else if (baseimm == 1.0) { - newswz = RC_SWIZZLE_ONE; - } else if (baseimm == 0.5 && c->has_half_swizzles) { - newswz = RC_SWIZZLE_HALF; - } else { - have_real_reference = 1; - continue; - } - - SET_SWZ(newsrc.Swizzle, chan, newswz); - if (imm < 0.0 && !newsrc.Abs) - newsrc.Negate ^= 1 << chan; - } - - if (!have_real_reference) { - newsrc.File = RC_FILE_NONE; - newsrc.Index = 0; - } - - /* don't make the swizzle worse */ - if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && - c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) - continue; - - inst->U.I.SrcReg[src] = newsrc; - } - - /* Simplify instructions based on constants */ - if (inst->U.I.Opcode == RC_OPCODE_MAD) - constant_folding_mad(inst); - - /* note: MAD can simplify to MUL or ADD */ - if (inst->U.I.Opcode == RC_OPCODE_MUL) - constant_folding_mul(inst); - else if (inst->U.I.Opcode == RC_OPCODE_ADD) - constant_folding_add(inst); - - /* In case this instruction has been converted, make sure all of the - * registers that are no longer used are empty. */ - opcode = rc_get_opcode_info(inst->U.I.Opcode); - for(i = opcode->NumSrcRegs; i < 3; i++) { - memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); - } -} - -/** - * If src and dst use the same register, this function returns a writemask that - * indicates wich components are read by src. Otherwise zero is returned. - */ -static unsigned int src_reads_dst_mask(struct rc_src_register src, - struct rc_dst_register dst) -{ - if (dst.File != src.File || dst.Index != src.Index) { - return 0; - } - return rc_swizzle_to_writemask(src.Swizzle); -} - -/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) - * in any of its channels. Return 0 otherwise. */ -static int src_has_const_swz(struct rc_src_register src) { - int chan; - for(chan = 0; chan < 4; chan++) { - unsigned int swz = GET_SWZ(src.Swizzle, chan); - if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF - || swz == RC_SWIZZLE_ONE) { - return 1; - } - } - return 0; -} - -static void presub_scan_read( - void * data, - struct rc_instruction * inst, - struct rc_src_register * src) -{ - struct rc_reader_data * reader_data = data; - rc_presubtract_op * presub_opcode = reader_data->CbData; - - if (!rc_inst_can_use_presub(inst, *presub_opcode, - reader_data->Writer->U.I.DstReg.WriteMask, - src, - &reader_data->Writer->U.I.SrcReg[0], - &reader_data->Writer->U.I.SrcReg[1])) { - reader_data->Abort = 1; - return; - } -} - -static int presub_helper( - struct radeon_compiler * c, - struct rc_instruction * inst_add, - rc_presubtract_op presub_opcode, - rc_presub_replace_fn presub_replace) -{ - struct rc_reader_data reader_data; - unsigned int i; - rc_presubtract_op cb_op = presub_opcode; - - reader_data.CbData = &cb_op; - reader_data.ExitOnAbort = 1; - rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, - is_src_clobbered_scan_write); - - if (reader_data.Abort || reader_data.ReaderCount == 0) - return 0; - - for(i = 0; i < reader_data.ReaderCount; i++) { - unsigned int src_index; - struct rc_reader reader = reader_data.Readers[i]; - const struct rc_opcode_info * info = - rc_get_opcode_info(reader.Inst->U.I.Opcode); - - for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { - if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) - presub_replace(inst_add, reader.Inst, src_index); - } - } - return 1; -} - -/* This function assumes that inst_add->U.I.SrcReg[0] and - * inst_add->U.I.SrcReg[1] aren't both negative. */ -static void presub_replace_add( - struct rc_instruction * inst_add, - struct rc_instruction * inst_reader, - unsigned int src_index) -{ - rc_presubtract_op presub_opcode; - if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) - presub_opcode = RC_PRESUB_SUB; - else - presub_opcode = RC_PRESUB_ADD; - - if (inst_add->U.I.SrcReg[1].Negate) { - inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; - inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; - } else { - inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; - inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; - } - inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; - inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; - inst_reader->U.I.PreSub.Opcode = presub_opcode; - inst_reader->U.I.SrcReg[src_index] = - chain_srcregs(inst_reader->U.I.SrcReg[src_index], - inst_reader->U.I.PreSub.SrcReg[0]); - inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; - inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; -} - -static int is_presub_candidate( - struct radeon_compiler * c, - struct rc_instruction * inst) -{ - const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); - unsigned int i; - unsigned int is_constant[2] = {0, 0}; - - assert(inst->U.I.Opcode == RC_OPCODE_ADD); - - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE - || inst->U.I.SaturateMode - || inst->U.I.WriteALUResult) { - return 0; - } - - /* If both sources use a constant swizzle, then we can't convert it to - * a presubtract operation. In fact for the ADD and SUB presubtract - * operations neither source can contain a constant swizzle. This - * specific case is checked in peephole_add_presub_add() when - * we make sure the swizzles for both sources are equal, so we - * don't need to worry about it here. */ - for (i = 0; i < 2; i++) { - int chan; - for (chan = 0; chan < 4; chan++) { - rc_swizzle swz = - get_swz(inst->U.I.SrcReg[i].Swizzle, chan); - if (swz == RC_SWIZZLE_ONE - || swz == RC_SWIZZLE_ZERO - || swz == RC_SWIZZLE_HALF) { - is_constant[i] = 1; - } - } - } - if (is_constant[0] && is_constant[1]) - return 0; - - for(i = 0; i < info->NumSrcRegs; i++) { - struct rc_src_register src = inst->U.I.SrcReg[i]; - if (src_reads_dst_mask(src, inst->U.I.DstReg)) - return 0; - - src.File = RC_FILE_PRESUB; - if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) - return 0; - } - return 1; -} - -static int peephole_add_presub_add( - struct radeon_compiler * c, - struct rc_instruction * inst_add) -{ - unsigned dstmask = inst_add->U.I.DstReg.WriteMask; - unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; - unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; - - if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) - return 0; - - /* src0 and src1 can't have absolute values */ - if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) - return 0; - - /* presub_replace_add() assumes only one is negative */ - if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) - return 0; - - /* if src0 is negative, at least all bits of dstmask have to be set */ - if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) - return 0; - - /* if src1 is negative, at least all bits of dstmask have to be set */ - if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) - return 0; - - if (!is_presub_candidate(c, inst_add)) - return 0; - - if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { - rc_remove_instruction(inst_add); - return 1; - } - return 0; -} - -static void presub_replace_inv( - struct rc_instruction * inst_add, - struct rc_instruction * inst_reader, - unsigned int src_index) -{ - /* We must be careful not to modify inst_add, since it - * is possible it will remain part of the program.*/ - inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; - inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; - inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; - inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], - inst_reader->U.I.PreSub.SrcReg[0]); - - inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; - inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; -} - -/** - * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] - * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source - * of the add instruction must have the constatnt 1 swizzle. This function - * does not check const registers to see if their value is 1.0, so it should - * be called after the constant_folding optimization. - * @return - * 0 if the ADD instruction is still part of the program. - * 1 if the ADD instruction is no longer part of the program. - */ -static int peephole_add_presub_inv( - struct radeon_compiler * c, - struct rc_instruction * inst_add) -{ - unsigned int i, swz; - - if (!is_presub_candidate(c, inst_add)) - return 0; - - /* Check if src0 is 1. */ - /* XXX It would be nice to use is_src_uniform_constant here, but that - * function only works if the register's file is RC_FILE_NONE */ - for(i = 0; i < 4; i++ ) { - swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); - if(((1 << i) & inst_add->U.I.DstReg.WriteMask) - && swz != RC_SWIZZLE_ONE) { - return 0; - } - } - - /* Check src1. */ - if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != - inst_add->U.I.DstReg.WriteMask - || inst_add->U.I.SrcReg[1].Abs - || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY - && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) - || src_has_const_swz(inst_add->U.I.SrcReg[1])) { - - return 0; - } - - if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { - rc_remove_instruction(inst_add); - return 1; - } - return 0; -} - -/** - * @return - * 0 if inst is still part of the program. - * 1 if inst is no longer part of the program. - */ -static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) -{ - switch(inst->U.I.Opcode){ - case RC_OPCODE_ADD: - if (c->has_presub) { - if(peephole_add_presub_inv(c, inst)) - return 1; - if(peephole_add_presub_add(c, inst)) - return 1; - } - break; - default: - break; - } - return 0; -} - -void rc_optimize(struct radeon_compiler * c, void *user) -{ - struct rc_instruction * inst = c->Program.Instructions.Next; - while(inst != &c->Program.Instructions) { - struct rc_instruction * cur = inst; - inst = inst->Next; - - constant_folding(c, cur); - - if(peephole(c, cur)) - continue; - - if (cur->U.I.Opcode == RC_OPCODE_MOV) { - copy_propagate(c, cur); - /* cur may no longer be part of the program */ - } - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_dead_sources.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_dead_sources.c deleted file mode 100644 index 1e9a2c0..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_dead_sources.c +++ /dev/null @@ -1,62 +0,0 @@ - -#include "radeon_compiler.h" -#include "radeon_compiler_util.h" -#include "radeon_opcodes.h" -#include "radeon_program_pair.h" - -static void mark_used_presub(struct rc_pair_sub_instruction * sub) -{ - if (sub->Src[RC_PAIR_PRESUB_SRC].Used) { - unsigned int presub_reg_count = rc_presubtract_src_reg_count( - sub->Src[RC_PAIR_PRESUB_SRC].Index); - unsigned int i; - for (i = 0; i < presub_reg_count; i++) { - sub->Src[i].Used = 1; - } - } -} - -static void mark_used( - struct rc_instruction * inst, - struct rc_pair_sub_instruction * sub) -{ - unsigned int i; - const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); - for (i = 0; i < info->NumSrcRegs; i++) { - unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle); - if (src_type & RC_SOURCE_RGB) { - inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1; - } - - if (src_type & RC_SOURCE_ALPHA) { - inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1; - } - } -} - -/** - * This pass finds sources that are not used by their instruction and marks - * them as unused. - */ -void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user) -{ - struct rc_instruction * inst; - for (inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - unsigned int i; - if (inst->Type == RC_INSTRUCTION_NORMAL) - continue; - - /* Mark all sources as unused */ - for (i = 0; i < 4; i++) { - inst->U.P.RGB.Src[i].Used = 0; - inst->U.P.Alpha.Src[i].Used = 0; - } - mark_used(inst, &inst->U.P.RGB); - mark_used(inst, &inst->U.P.Alpha); - - mark_used_presub(&inst->U.P.RGB); - mark_used_presub(&inst->U.P.Alpha); - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c deleted file mode 100644 index 49983d6..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ /dev/null @@ -1,706 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * Copyright 2011 Tom Stellard - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_program_pair.h" - -#include - -#include "main/glheader.h" -#include "program/register_allocate.h" -#include "ralloc.h" - -#include "r300_fragprog_swizzle.h" -#include "radeon_compiler.h" -#include "radeon_compiler_util.h" -#include "radeon_dataflow.h" -#include "radeon_list.h" -#include "radeon_variable.h" - -#define VERBOSE 0 - -#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) - - - -struct register_info { - struct live_intervals Live[4]; - - unsigned int Used:1; - unsigned int Allocated:1; - unsigned int File:3; - unsigned int Index:RC_REGISTER_INDEX_BITS; - unsigned int Writemask; -}; - -struct regalloc_state { - struct radeon_compiler * C; - - struct register_info * Input; - unsigned int NumInputs; - - struct register_info * Temporary; - unsigned int NumTemporaries; - - unsigned int Simple; - int LoopEnd; -}; - -enum rc_reg_class { - RC_REG_CLASS_SINGLE, - RC_REG_CLASS_DOUBLE, - RC_REG_CLASS_TRIPLE, - RC_REG_CLASS_ALPHA, - RC_REG_CLASS_SINGLE_PLUS_ALPHA, - RC_REG_CLASS_DOUBLE_PLUS_ALPHA, - RC_REG_CLASS_TRIPLE_PLUS_ALPHA, - RC_REG_CLASS_X, - RC_REG_CLASS_Y, - RC_REG_CLASS_Z, - RC_REG_CLASS_XY, - RC_REG_CLASS_YZ, - RC_REG_CLASS_XZ, - RC_REG_CLASS_XW, - RC_REG_CLASS_YW, - RC_REG_CLASS_ZW, - RC_REG_CLASS_XYW, - RC_REG_CLASS_YZW, - RC_REG_CLASS_XZW, - RC_REG_CLASS_COUNT -}; - -struct rc_class { - enum rc_reg_class Class; - - unsigned int WritemaskCount; - - /** This is 1 if this class is being used by the register allocator - * and 0 otherwise */ - unsigned int Used; - - /** This is the ID number assigned to this class by ra. */ - unsigned int Id; - - /** List of writemasks that belong to this class */ - unsigned int Writemasks[3]; - - -}; - -static void print_live_intervals(struct live_intervals * src) -{ - if (!src || !src->Used) { - DBG("(null)"); - return; - } - - DBG("(%i,%i)", src->Start, src->End); -} - -static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) -{ - if (VERBOSE) { - DBG("overlap_live_intervals: "); - print_live_intervals(a); - DBG(" to "); - print_live_intervals(b); - DBG("\n"); - } - - if (!a->Used || !b->Used) { - DBG(" unused interval\n"); - return 0; - } - - if (a->Start > b->Start) { - if (a->Start < b->End) { - DBG(" overlap\n"); - return 1; - } - } else if (b->Start > a->Start) { - if (b->Start < a->End) { - DBG(" overlap\n"); - return 1; - } - } else { /* a->Start == b->Start */ - if (a->Start != a->End && b->Start != b->End) { - DBG(" overlap\n"); - return 1; - } - } - - DBG(" no overlap\n"); - - return 0; -} - -static void scan_read_callback(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) -{ - struct regalloc_state * s = data; - struct register_info * reg; - unsigned int i; - - if (file != RC_FILE_INPUT) - return; - - s->Input[index].Used = 1; - reg = &s->Input[index]; - - for (i = 0; i < 4; i++) { - if (!((mask >> i) & 0x1)) { - continue; - } - reg->Live[i].Used = 1; - reg->Live[i].Start = 0; - reg->Live[i].End = - s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP; - } -} - -static void remap_register(void * data, struct rc_instruction * inst, - rc_register_file * file, unsigned int * index) -{ - struct regalloc_state * s = data; - const struct register_info * reg; - - if (*file == RC_FILE_TEMPORARY && s->Simple) - reg = &s->Temporary[*index]; - else if (*file == RC_FILE_INPUT) - reg = &s->Input[*index]; - else - return; - - if (reg->Allocated) { - *index = reg->Index; - } -} - -static void alloc_input_simple(void * data, unsigned int input, - unsigned int hwreg) -{ - struct regalloc_state * s = data; - - if (input >= s->NumInputs) - return; - - s->Input[input].Allocated = 1; - s->Input[input].File = RC_FILE_TEMPORARY; - s->Input[input].Index = hwreg; -} - -/* This functions offsets the temporary register indices by the number - * of input registers, because input registers are actually temporaries and - * should not occupy the same space. - * - * This pass is supposed to be used to maintain correct allocation of inputs - * if the standard register allocation is disabled. */ -static void do_regalloc_inputs_only(struct regalloc_state * s) -{ - for (unsigned i = 0; i < s->NumTemporaries; i++) { - s->Temporary[i].Allocated = 1; - s->Temporary[i].File = RC_FILE_TEMPORARY; - s->Temporary[i].Index = i + s->NumInputs; - } -} - -static unsigned int is_derivative(rc_opcode op) -{ - return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); -} - -static int find_class( - struct rc_class * classes, - unsigned int writemask, - unsigned int max_writemask_count) -{ - unsigned int i; - for (i = 0; i < RC_REG_CLASS_COUNT; i++) { - unsigned int j; - if (classes[i].WritemaskCount > max_writemask_count) { - continue; - } - for (j = 0; j < 3; j++) { - if (classes[i].Writemasks[j] == writemask) { - return i; - } - } - } - return -1; -} - -static enum rc_reg_class variable_get_class( - struct rc_variable * variable, - struct rc_class * classes) -{ - unsigned int i; - unsigned int can_change_writemask= 1; - unsigned int writemask = rc_variable_writemask_sum(variable); - struct rc_list * readers = rc_variable_readers_union(variable); - int class_index; - - if (!variable->C->is_r500) { - struct rc_class c; - /* The assumption here is that if an instruction has type - * RC_INSTRUCTION_NORMAL then it is a TEX instruction. - * r300 and r400 can't swizzle the result of a TEX lookup. */ - if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) { - writemask = RC_MASK_XYZW; - } - - /* Check if it is possible to do swizzle packing for r300/r400 - * without creating non-native swizzles. */ - class_index = find_class(classes, writemask, 3); - if (class_index < 0) { - goto error; - } - c = classes[class_index]; - for (i = 0; i < c.WritemaskCount; i++) { - int j; - unsigned int conversion_swizzle = - rc_make_conversion_swizzle( - writemask, c.Writemasks[i]); - for (j = 0; j < variable->ReaderCount; j++) { - unsigned int old_swizzle; - unsigned int new_swizzle; - struct rc_reader r = variable->Readers[j]; - if (r.Inst->Type == RC_INSTRUCTION_PAIR ) { - old_swizzle = r.U.P.Arg->Swizzle; - } else { - old_swizzle = r.U.I.Src->Swizzle; - } - new_swizzle = rc_adjust_channels( - old_swizzle, conversion_swizzle); - if (!r300_swizzle_is_native_basic(new_swizzle)) { - can_change_writemask = 0; - break; - } - } - if (!can_change_writemask) { - break; - } - } - } - - if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { - /* DDX/DDY seem to always fail when their writemasks are - * changed.*/ - if (is_derivative(variable->Inst->U.P.RGB.Opcode) - || is_derivative(variable->Inst->U.P.Alpha.Opcode)) { - can_change_writemask = 0; - } - } - for ( ; readers; readers = readers->Next) { - struct rc_reader * r = readers->Item; - if (r->Inst->Type == RC_INSTRUCTION_PAIR) { - if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { - can_change_writemask = 0; - break; - } - /* DDX/DDY also fail when their swizzles are changed. */ - if (is_derivative(r->Inst->U.P.RGB.Opcode) - || is_derivative(r->Inst->U.P.Alpha.Opcode)) { - can_change_writemask = 0; - break; - } - } - } - - class_index = find_class(classes, writemask, - can_change_writemask ? 3 : 1); - if (class_index > -1) { - return classes[class_index].Class; - } else { -error: - rc_error(variable->C, - "Could not find class for index=%u mask=%u\n", - variable->Dst.Index, writemask); - return 0; - } -} - -static unsigned int overlap_live_intervals_array( - struct live_intervals * a, - struct live_intervals * b) -{ - unsigned int a_chan, b_chan; - for (a_chan = 0; a_chan < 4; a_chan++) { - for (b_chan = 0; b_chan < 4; b_chan++) { - if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { - return 1; - } - } - } - return 0; -} - -static unsigned int reg_get_index(int reg) -{ - return reg / RC_MASK_XYZW; -} - -static unsigned int reg_get_writemask(int reg) -{ - return (reg % RC_MASK_XYZW) + 1; -} - -static int get_reg_id(unsigned int index, unsigned int writemask) -{ - assert(writemask); - if (writemask == 0) { - return 0; - } - return (index * RC_MASK_XYZW) + (writemask - 1); -} - -#if VERBOSE -static void print_reg(int reg) -{ - unsigned int index = reg_get_index(reg); - unsigned int mask = reg_get_writemask(reg); - fprintf(stderr, "Temp[%u].%c%c%c%c", index, - mask & RC_MASK_X ? 'x' : '_', - mask & RC_MASK_Y ? 'y' : '_', - mask & RC_MASK_Z ? 'z' : '_', - mask & RC_MASK_W ? 'w' : '_'); -} -#endif - -static void add_register_conflicts( - struct ra_regs * regs, - unsigned int max_temp_regs) -{ - unsigned int index, a_mask, b_mask; - for (index = 0; index < max_temp_regs; index++) { - for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { - for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; - b_mask++) { - if (a_mask & b_mask) { - ra_add_reg_conflict(regs, - get_reg_id(index, a_mask), - get_reg_id(index, b_mask)); - } - } - } - } -} - -static void do_advanced_regalloc(struct regalloc_state * s) -{ - struct rc_class rc_class_list [] = { - {RC_REG_CLASS_SINGLE, 3, 0, 0, - {RC_MASK_X, - RC_MASK_Y, - RC_MASK_Z}}, - {RC_REG_CLASS_DOUBLE, 3, 0, 0, - {RC_MASK_X | RC_MASK_Y, - RC_MASK_X | RC_MASK_Z, - RC_MASK_Y | RC_MASK_Z}}, - {RC_REG_CLASS_TRIPLE, 1, 0, 0, - {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_ALPHA, 1, 0, 0, - {RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0, - {RC_MASK_X | RC_MASK_W, - RC_MASK_Y | RC_MASK_W, - RC_MASK_Z | RC_MASK_W}}, - {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0, - {RC_MASK_X | RC_MASK_Y | RC_MASK_W, - RC_MASK_X | RC_MASK_Z | RC_MASK_W, - RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, - {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0, - {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_X, 1, 0, 0, - {RC_MASK_X, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_Y, 1, 0, 0, - {RC_MASK_Y, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_Z, 1, 0, 0, - {RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_XY, 1, 0, 0, - {RC_MASK_X | RC_MASK_Y, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_YZ, 1, 0, 0, - {RC_MASK_Y | RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_XZ, 1, 0, 0, - {RC_MASK_X | RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_XW, 1, 0, 0, - {RC_MASK_X | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_YW, 1, 0, 0, - {RC_MASK_Y | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_ZW, 1, 0, 0, - {RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_XYW, 1, 0, 0, - {RC_MASK_X | RC_MASK_Y | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_YZW, 1, 0, 0, - {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_XZW, 1, 0, 0, - {RC_MASK_X | RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}} - }; - - unsigned int i, j, index, input_node, node_count, node_index; - unsigned int * node_classes; - unsigned int * input_classes; - struct rc_instruction * inst; - struct rc_list * var_ptr; - struct rc_list * variables; - struct ra_regs * regs; - struct ra_graph * graph; - - /* Allocate the main ra data structure */ - regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW); - - /* Get list of program variables */ - variables = rc_get_variables(s->C); - node_count = rc_list_count(variables); - node_classes = memory_pool_malloc(&s->C->Pool, - node_count * sizeof(unsigned int)); - input_classes = memory_pool_malloc(&s->C->Pool, - s->NumInputs * sizeof(unsigned int)); - - for (var_ptr = variables, node_index = 0; var_ptr; - var_ptr = var_ptr->Next, node_index++) { - unsigned int class_index; - /* Compute the live intervals */ - rc_variable_compute_live_intervals(var_ptr->Item); - - class_index = variable_get_class(var_ptr->Item, rc_class_list); - - /* If we haven't used this register class yet, mark it - * as used and allocate space for it. */ - if (!rc_class_list[class_index].Used) { - rc_class_list[class_index].Used = 1; - rc_class_list[class_index].Id = ra_alloc_reg_class(regs); - } - - node_classes[node_index] = rc_class_list[class_index].Id; - } - - - /* Assign registers to the classes */ - for (i = 0; i < RC_REG_CLASS_COUNT; i++) { - struct rc_class class = rc_class_list[i]; - if (!class.Used) { - continue; - } - - for (index = 0; index < s->C->max_temp_regs; index++) { - for (j = 0; j < class.WritemaskCount; j++) { - int reg_id = get_reg_id(index, - class.Writemasks[j]); - ra_class_add_reg(regs, class.Id, reg_id); - } - } - } - - /* Add register conflicts */ - add_register_conflicts(regs, s->C->max_temp_regs); - - /* Calculate live intervals for input registers */ - for (inst = s->C->Program.Instructions.Next; - inst != &s->C->Program.Instructions; - inst = inst->Next) { - rc_opcode op = rc_get_flow_control_inst(inst); - if (op == RC_OPCODE_BGNLOOP) { - struct rc_instruction * endloop = - rc_match_bgnloop(inst); - if (endloop->IP > s->LoopEnd) { - s->LoopEnd = endloop->IP; - } - } - rc_for_all_reads_mask(inst, scan_read_callback, s); - } - - /* Create classes for input registers */ - for (i = 0; i < s->NumInputs; i++) { - unsigned int chan, class_id, writemask = 0; - for (chan = 0; chan < 4; chan++) { - if (s->Input[i].Live[chan].Used) { - writemask |= (1 << chan); - } - } - s->Input[i].Writemask = writemask; - if (!writemask) { - continue; - } - - class_id = ra_alloc_reg_class(regs); - input_classes[i] = class_id; - ra_class_add_reg(regs, class_id, - get_reg_id(s->Input[i].Index, writemask)); - } - - ra_set_finalize(regs); - - graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs); - - /* Build the interference graph */ - for (var_ptr = variables, node_index = 0; var_ptr; - var_ptr = var_ptr->Next,node_index++) { - struct rc_list * a, * b; - unsigned int b_index; - - ra_set_node_class(graph, node_index, node_classes[node_index]); - - for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; - b; b = b->Next, b_index++) { - struct rc_variable * var_a = a->Item; - while (var_a) { - struct rc_variable * var_b = b->Item; - while (var_b) { - if (overlap_live_intervals_array(var_a->Live, var_b->Live)) { - ra_add_node_interference(graph, - node_index, b_index); - } - var_b = var_b->Friend; - } - var_a = var_a->Friend; - } - } - } - - /* Add input registers to the interference graph */ - for (i = 0, input_node = 0; i< s->NumInputs; i++) { - if (!s->Input[i].Writemask) { - continue; - } - ra_set_node_class(graph, node_count + input_node, - input_classes[i]); - for (var_ptr = variables, node_index = 0; - var_ptr; var_ptr = var_ptr->Next, node_index++) { - struct rc_variable * var = var_ptr->Item; - if (overlap_live_intervals_array(s->Input[i].Live, - var->Live)) { - ra_add_node_interference(graph, node_index, - node_count + input_node); - } - } - /* Manually allocate a register for this input */ - ra_set_node_reg(graph, node_count + input_node, get_reg_id( - s->Input[i].Index, s->Input[i].Writemask)); - input_node++; - } - - if (!ra_allocate_no_spills(graph)) { - rc_error(s->C, "Ran out of hardware temporaries\n"); - return; - } - - /* Rewrite the registers */ - for (var_ptr = variables, node_index = 0; var_ptr; - var_ptr = var_ptr->Next, node_index++) { - int reg = ra_get_node_reg(graph, node_index); - unsigned int writemask = reg_get_writemask(reg); - unsigned int index = reg_get_index(reg); - struct rc_variable * var = var_ptr->Item; - - if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { - writemask = rc_variable_writemask_sum(var); - } - - if (var->Dst.File == RC_FILE_INPUT) { - continue; - } - rc_variable_change_dst(var, index, writemask); - } - - ralloc_free(graph); - ralloc_free(regs); -} - -/** - * @param user This parameter should be a pointer to an integer value. If this - * integer value is zero, then a simple register allocator will be used that - * only allocates space for input registers (\sa do_regalloc_inputs_only). If - * user is non-zero, then the regular register allocator will be used - * (\sa do_regalloc). - */ -void rc_pair_regalloc(struct radeon_compiler *cc, void *user) -{ - struct r300_fragment_program_compiler *c = - (struct r300_fragment_program_compiler*)cc; - struct regalloc_state s; - int * do_full_regalloc = (int*)user; - - memset(&s, 0, sizeof(s)); - s.C = cc; - s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; - s.Input = memory_pool_malloc(&cc->Pool, - s.NumInputs * sizeof(struct register_info)); - memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); - - s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; - s.Temporary = memory_pool_malloc(&cc->Pool, - s.NumTemporaries * sizeof(struct register_info)); - memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); - - rc_recompute_ips(s.C); - - c->AllocateHwInputs(c, &alloc_input_simple, &s); - if (*do_full_regalloc) { - do_advanced_regalloc(&s); - } else { - s.Simple = 1; - do_regalloc_inputs_only(&s); - } - - /* Rewrite inputs and if we are doing the simple allocation, rewrite - * temporaries too. */ - for (struct rc_instruction *inst = s.C->Program.Instructions.Next; - inst != &s.C->Program.Instructions; - inst = inst->Next) { - rc_remap_registers(inst, &remap_register, &s); - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c deleted file mode 100644 index 25cd52c..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ /dev/null @@ -1,1010 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_program_pair.h" - -#include - -#include "radeon_compiler.h" -#include "radeon_compiler_util.h" -#include "radeon_dataflow.h" - - -#define VERBOSE 0 - -#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) - -struct schedule_instruction { - struct rc_instruction * Instruction; - - /** Next instruction in the linked list of ready instructions. */ - struct schedule_instruction *NextReady; - - /** Values that this instruction reads and writes */ - struct reg_value * WriteValues[4]; - struct reg_value * ReadValues[12]; - unsigned int NumWriteValues:3; - unsigned int NumReadValues:4; - - /** - * Number of (read and write) dependencies that must be resolved before - * this instruction can be scheduled. - */ - unsigned int NumDependencies:5; - - /** List of all readers (see rc_get_readers() for the definition of - * "all readers"), even those outside the basic block this instruction - * lives in. */ - struct rc_reader_data GlobalReaders; -}; - - -/** - * Used to keep track of which instructions read a value. - */ -struct reg_value_reader { - struct schedule_instruction *Reader; - struct reg_value_reader *Next; -}; - -/** - * Used to keep track which values are stored in each component of a - * RC_FILE_TEMPORARY. - */ -struct reg_value { - struct schedule_instruction * Writer; - - /** - * Unordered linked list of instructions that read from this value. - * When this value becomes available, we increase all readers' - * dependency count. - */ - struct reg_value_reader *Readers; - - /** - * Number of readers of this value. This is decremented each time - * a reader of the value is committed. - * When the reader cound reaches zero, the dependency count - * of the instruction writing \ref Next is decremented. - */ - unsigned int NumReaders; - - struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ -}; - -struct register_state { - struct reg_value * Values[4]; -}; - -struct remap_reg { - struct rc_instruciont * Inst; - unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); - unsigned int OldSwizzle:3; - unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); - unsigned int NewSwizzle:3; - unsigned int OnlyTexReads:1; - struct remap_reg * Next; -}; - -struct schedule_state { - struct radeon_compiler * C; - struct schedule_instruction * Current; - - struct register_state Temporary[RC_REGISTER_MAX_INDEX]; - - /** - * Linked lists of instructions that can be scheduled right now, - * based on which ALU/TEX resources they require. - */ - /*@{*/ - struct schedule_instruction *ReadyFullALU; - struct schedule_instruction *ReadyRGB; - struct schedule_instruction *ReadyAlpha; - struct schedule_instruction *ReadyTEX; - /*@}*/ -}; - -static struct reg_value ** get_reg_valuep(struct schedule_state * s, - rc_register_file file, unsigned int index, unsigned int chan) -{ - if (file != RC_FILE_TEMPORARY) - return 0; - - if (index >= RC_REGISTER_MAX_INDEX) { - rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index); - return 0; - } - - return &s->Temporary[index].Values[chan]; -} - -static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) -{ - inst->NextReady = *list; - *list = inst; -} - -static void add_inst_to_list_end(struct schedule_instruction ** list, - struct schedule_instruction * inst) -{ - if(!*list){ - *list = inst; - }else{ - struct schedule_instruction * temp = *list; - while(temp->NextReady){ - temp = temp->NextReady; - } - temp->NextReady = inst; - } -} - -static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) -{ - DBG("%i is now ready\n", sinst->Instruction->IP); - - /* Adding Ready TEX instructions to the end of the "Ready List" helps - * us emit TEX instructions in blocks without losing our place. */ - if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) - add_inst_to_list_end(&s->ReadyTEX, sinst); - else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) - add_inst_to_list(&s->ReadyRGB, sinst); - else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) - add_inst_to_list(&s->ReadyAlpha, sinst); - else - add_inst_to_list(&s->ReadyFullALU, sinst); -} - -static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) -{ - assert(sinst->NumDependencies > 0); - sinst->NumDependencies--; - if (!sinst->NumDependencies) - instruction_ready(s, sinst); -} - -/** - * This function decreases the dependencies of the next instruction that - * wants to write to each of sinst's read values. - */ -static void commit_update_reads(struct schedule_state * s, - struct schedule_instruction * sinst){ - unsigned int i; - for(i = 0; i < sinst->NumReadValues; ++i) { - struct reg_value * v = sinst->ReadValues[i]; - assert(v->NumReaders > 0); - v->NumReaders--; - if (!v->NumReaders) { - if (v->Next) - decrease_dependencies(s, v->Next->Writer); - } - } -} - -static void commit_update_writes(struct schedule_state * s, - struct schedule_instruction * sinst){ - unsigned int i; - for(i = 0; i < sinst->NumWriteValues; ++i) { - struct reg_value * v = sinst->WriteValues[i]; - if (v->NumReaders) { - for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { - decrease_dependencies(s, r->Reader); - } - } else { - /* This happens in instruction sequences of the type - * OP r.x, ...; - * OP r.x, r.x, ...; - * See also the subtlety in how instructions that both - * read and write the same register are scanned. - */ - if (v->Next) - decrease_dependencies(s, v->Next->Writer); - } - } -} - -static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst) -{ - DBG("%i: commit\n", sinst->Instruction->IP); - - commit_update_reads(s, sinst); - - commit_update_writes(s, sinst); -} - -/** - * Emit all ready texture instructions in a single block. - * - * Emit as a single block to (hopefully) sample many textures in parallel, - * and to avoid hardware indirections on R300. - */ -static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) -{ - struct schedule_instruction *readytex; - struct rc_instruction * inst_begin; - - assert(s->ReadyTEX); - - /* Node marker for R300 */ - inst_begin = rc_insert_new_instruction(s->C, before->Prev); - inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; - - /* Link texture instructions back in */ - readytex = s->ReadyTEX; - while(readytex) { - rc_insert_instruction(before->Prev, readytex->Instruction); - DBG("%i: commit TEX reads\n", readytex->Instruction->IP); - - /* All of the TEX instructions in the same TEX block have - * their source registers read from before any of the - * instructions in that block write to their destination - * registers. This means that when we commit a TEX - * instruction, any other TEX instruction that wants to write - * to one of the committed instruction's source register can be - * marked as ready and should be emitted in the same TEX - * block. This prevents the following sequence from being - * emitted in two different TEX blocks: - * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0]; - * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0]; - */ - commit_update_reads(s, readytex); - readytex = readytex->NextReady; - } - readytex = s->ReadyTEX; - s->ReadyTEX = 0; - while(readytex){ - DBG("%i: commit TEX writes\n", readytex->Instruction->IP); - commit_update_writes(s, readytex); - readytex = readytex->NextReady; - } -} - -/* This is a helper function for destructive_merge_instructions(). It helps - * merge presubtract sources from two instructions and makes sure the - * presubtract sources end up in the correct spot. This function assumes that - * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb) - * but no scalar instruction (alpha). - * @return 0 if merging the presubtract sources fails. - * @retrun 1 if merging the presubtract sources succeeds. - */ -static int merge_presub_sources( - struct rc_pair_instruction * dst_full, - struct rc_pair_sub_instruction src, - unsigned int type) -{ - unsigned int srcp_src, srcp_regs, is_rgb, is_alpha; - struct rc_pair_sub_instruction * dst_sub; - const struct rc_opcode_info * info; - - assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); - - switch(type) { - case RC_SOURCE_RGB: - is_rgb = 1; - is_alpha = 0; - dst_sub = &dst_full->RGB; - break; - case RC_SOURCE_ALPHA: - is_rgb = 0; - is_alpha = 1; - dst_sub = &dst_full->Alpha; - break; - default: - assert(0); - return 0; - } - - info = rc_get_opcode_info(dst_full->RGB.Opcode); - - if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used) - return 0; - - srcp_regs = rc_presubtract_src_reg_count( - src.Src[RC_PAIR_PRESUB_SRC].Index); - for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { - unsigned int arg; - int free_source; - unsigned int one_way = 0; - struct rc_pair_instruction_source srcp = src.Src[srcp_src]; - struct rc_pair_instruction_source temp; - - free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, - srcp.File, srcp.Index); - - /* If free_source < 0 then there are no free source - * slots. */ - if (free_source < 0) - return 0; - - temp = dst_sub->Src[srcp_src]; - dst_sub->Src[srcp_src] = dst_sub->Src[free_source]; - - /* srcp needs src0 and src1 to be the same */ - if (free_source < srcp_src) { - if (!temp.Used) - continue; - free_source = rc_pair_alloc_source(dst_full, is_rgb, - is_alpha, temp.File, temp.Index); - if (free_source < 0) - return 0; - one_way = 1; - } else { - dst_sub->Src[free_source] = temp; - } - - /* If free_source == srcp_src, then the presubtract - * source is already in the correct place. */ - if (free_source == srcp_src) - continue; - - /* Shuffle the sources, so we can put the - * presubtract source in the correct place. */ - for(arg = 0; arg < info->NumSrcRegs; arg++) { - /*If this arg does not read from an rgb source, - * do nothing. */ - if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) - & type)) { - continue; - } - - if (dst_full->RGB.Arg[arg].Source == srcp_src) - dst_full->RGB.Arg[arg].Source = free_source; - /* We need to do this just in case register - * is one of the sources already, but in the - * wrong spot. */ - else if(dst_full->RGB.Arg[arg].Source == free_source - && !one_way) { - dst_full->RGB.Arg[arg].Source = srcp_src; - } - } - } - return 1; -} - - -/* This function assumes that rgb.Alpha and alpha.RGB are unused */ -static int destructive_merge_instructions( - struct rc_pair_instruction * rgb, - struct rc_pair_instruction * alpha) -{ - const struct rc_opcode_info * opcode; - - assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); - assert(alpha->RGB.Opcode == RC_OPCODE_NOP); - - /* Presubtract registers need to be merged first so that registers - * needed by the presubtract operation can be placed in src0 and/or - * src1. */ - - /* Merge the rgb presubtract registers. */ - if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { - return 0; - } - } - /* Merge the alpha presubtract registers */ - if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { - if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ - return 0; - } - } - - /* Copy alpha args into rgb */ - opcode = rc_get_opcode_info(alpha->Alpha.Opcode); - - for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { - unsigned int srcrgb = 0; - unsigned int srcalpha = 0; - unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; - rc_register_file file = 0; - unsigned int index = 0; - int source; - - if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) { - srcrgb = 1; - file = alpha->RGB.Src[oldsrc].File; - index = alpha->RGB.Src[oldsrc].Index; - } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) { - srcalpha = 1; - file = alpha->Alpha.Src[oldsrc].File; - index = alpha->Alpha.Src[oldsrc].Index; - } - - source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); - if (source < 0) - return 0; - - rgb->Alpha.Arg[arg].Source = source; - rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; - rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; - rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; - } - - /* Copy alpha opcode into rgb */ - rgb->Alpha.Opcode = alpha->Alpha.Opcode; - rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; - rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; - rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; - rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; - rgb->Alpha.Saturate = alpha->Alpha.Saturate; - - /* Merge ALU result writing */ - if (alpha->WriteALUResult) { - if (rgb->WriteALUResult) - return 0; - - rgb->WriteALUResult = alpha->WriteALUResult; - rgb->ALUResultCompare = alpha->ALUResultCompare; - } - - return 1; -} - -/** - * Try to merge the given instructions into the rgb instructions. - * - * Return true on success; on failure, return false, and keep - * the instructions untouched. - */ -static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) -{ - struct rc_pair_instruction backup; - - /*Instructions can't write output registers and ALU result at the - * same time. */ - if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask) - || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) { - return 0; - } - memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); - - if (destructive_merge_instructions(rgb, alpha)) - return 1; - - memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); - return 0; -} - -static void presub_nop(struct rc_instruction * emitted) { - int prev_rgb_index, prev_alpha_index, i, num_src; - - /* We don't need a nop if the previous instruction is a TEX. */ - if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) { - return; - } - if (emitted->Prev->U.P.RGB.WriteMask) - prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex; - else - prev_rgb_index = -1; - if (emitted->Prev->U.P.Alpha.WriteMask) - prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex; - else - prev_alpha_index = 1; - - /* Check the previous rgb instruction */ - if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - num_src = rc_presubtract_src_reg_count( - emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index); - for (i = 0; i < num_src; i++) { - unsigned int index = emitted->U.P.RGB.Src[i].Index; - if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY - && (index == prev_rgb_index - || index == prev_alpha_index)) { - emitted->Prev->U.P.Nop = 1; - return; - } - } - } - - /* Check the previous alpha instruction. */ - if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) - return; - - num_src = rc_presubtract_src_reg_count( - emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index); - for (i = 0; i < num_src; i++) { - unsigned int index = emitted->U.P.Alpha.Src[i].Index; - if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY - && (index == prev_rgb_index || index == prev_alpha_index)) { - emitted->Prev->U.P.Nop = 1; - return; - } - } -} - -static void rgb_to_alpha_remap ( - struct rc_instruction * inst, - struct rc_pair_instruction_arg * arg, - rc_register_file old_file, - rc_swizzle old_swz, - unsigned int new_index) -{ - int new_src_index; - unsigned int i; - - for (i = 0; i < 3; i++) { - if (get_swz(arg->Swizzle, i) == old_swz) { - SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); - } - } - new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, - old_file, new_index); - /* This conversion is not possible, we must have made a mistake in - * is_rgb_to_alpha_possible. */ - if (new_src_index < 0) { - assert(0); - return; - } - - arg->Source = new_src_index; -} - -static int can_remap(unsigned int opcode) -{ - switch(opcode) { - case RC_OPCODE_DDX: - case RC_OPCODE_DDY: - return 0; - default: - return 1; - } -} - -static int can_convert_opcode_to_alpha(unsigned int opcode) -{ - switch(opcode) { - case RC_OPCODE_DDX: - case RC_OPCODE_DDY: - case RC_OPCODE_DP2: - case RC_OPCODE_DP3: - case RC_OPCODE_DP4: - case RC_OPCODE_DPH: - return 0; - default: - return 1; - } -} - -static void is_rgb_to_alpha_possible( - void * userdata, - struct rc_instruction * inst, - struct rc_pair_instruction_arg * arg, - struct rc_pair_instruction_source * src) -{ - unsigned int chan_count = 0; - unsigned int alpha_sources = 0; - unsigned int i; - struct rc_reader_data * reader_data = userdata; - - if (!can_remap(inst->U.P.RGB.Opcode) - || !can_remap(inst->U.P.Alpha.Opcode)) { - reader_data->Abort = 1; - return; - } - - if (!src) - return; - - /* XXX There are some cases where we can still do the conversion if - * a reader reads from a presubtract source, but for now we'll prevent - * it. */ - if (arg->Source == RC_PAIR_PRESUB_SRC) { - reader_data->Abort = 1; - return; - } - - /* Make sure the source only reads from one component. - * XXX We should allow the source to read from the same component twice. - * XXX If the index we will be converting to is the same as the - * current index, then it is OK to read from more than one component. - */ - for (i = 0; i < 3; i++) { - rc_swizzle swz = get_swz(arg->Swizzle, i); - switch(swz) { - case RC_SWIZZLE_X: - case RC_SWIZZLE_Y: - case RC_SWIZZLE_Z: - case RC_SWIZZLE_W: - chan_count++; - break; - default: - break; - } - } - if (chan_count > 1) { - reader_data->Abort = 1; - return; - } - - /* Make sure there are enough alpha sources. - * XXX If we know what register all the readers are going - * to be remapped to, then in some situations we can still do - * the subsitution, even if all 3 alpha sources are being used.*/ - for (i = 0; i < 3; i++) { - if (inst->U.P.Alpha.Src[i].Used) { - alpha_sources++; - } - } - if (alpha_sources > 2) { - reader_data->Abort = 1; - return; - } -} - -static int convert_rgb_to_alpha( - struct schedule_state * s, - struct schedule_instruction * sched_inst) -{ - struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; - unsigned int old_mask = pair_inst->RGB.WriteMask; - unsigned int old_swz = rc_mask_to_swizzle(old_mask); - const struct rc_opcode_info * info = - rc_get_opcode_info(pair_inst->RGB.Opcode); - int new_index = -1; - unsigned int i; - - if (sched_inst->GlobalReaders.Abort) - return 0; - - if (!pair_inst->RGB.WriteMask) - return 0; - - if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) - || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { - return 0; - } - - assert(sched_inst->NumWriteValues == 1); - - if (!sched_inst->WriteValues[0]) { - assert(0); - return 0; - } - - /* We start at the old index, because if we can reuse the same - * register and just change the swizzle then it is more likely we - * will be able to convert all the readers. */ - for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { - struct reg_value ** new_regvalp = get_reg_valuep( - s, RC_FILE_TEMPORARY, i, 3); - if (!*new_regvalp) { - struct reg_value ** old_regvalp = - get_reg_valuep(s, - RC_FILE_TEMPORARY, - pair_inst->RGB.DestIndex, - rc_mask_to_swizzle(old_mask)); - new_index = i; - *new_regvalp = *old_regvalp; - *old_regvalp = NULL; - new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); - break; - } - } - if (new_index < 0) { - return 0; - } - - pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; - pair_inst->Alpha.DestIndex = new_index; - pair_inst->Alpha.WriteMask = RC_MASK_W; - pair_inst->Alpha.Target = pair_inst->RGB.Target; - pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; - pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; - pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; - memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, - sizeof(pair_inst->Alpha.Arg)); - /* Move the swizzles into the first chan */ - for (i = 0; i < info->NumSrcRegs; i++) { - unsigned int j; - for (j = 0; j < 3; j++) { - unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); - if (swz != RC_SWIZZLE_UNUSED) { - pair_inst->Alpha.Arg[i].Swizzle = - rc_init_swizzle(swz, 1); - break; - } - } - } - pair_inst->RGB.Opcode = RC_OPCODE_NOP; - pair_inst->RGB.DestIndex = 0; - pair_inst->RGB.WriteMask = 0; - pair_inst->RGB.Target = 0; - pair_inst->RGB.OutputWriteMask = 0; - pair_inst->RGB.DepthWriteMask = 0; - pair_inst->RGB.Saturate = 0; - memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); - - for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { - struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; - rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg, - RC_FILE_TEMPORARY, old_swz, new_index); - } - return 1; -} - -/** - * Find a good ALU instruction or pair of ALU instruction and emit it. - * - * Prefer emitting full ALU instructions, so that when we reach a point - * where no full ALU instruction can be emitted, we have more candidates - * for RGB/Alpha pairing. - */ -static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before) -{ - struct schedule_instruction * sinst; - - if (s->ReadyFullALU) { - sinst = s->ReadyFullALU; - s->ReadyFullALU = s->ReadyFullALU->NextReady; - rc_insert_instruction(before->Prev, sinst->Instruction); - commit_alu_instruction(s, sinst); - } else { - struct schedule_instruction **prgb; - struct schedule_instruction **palpha; - struct schedule_instruction *prev; -pair: - /* Some pairings might fail because they require too - * many source slots; try all possible pairings if necessary */ - for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { - for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { - struct schedule_instruction * psirgb = *prgb; - struct schedule_instruction * psialpha = *palpha; - - if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P)) - continue; - - *prgb = (*prgb)->NextReady; - *palpha = (*palpha)->NextReady; - rc_insert_instruction(before->Prev, psirgb->Instruction); - commit_alu_instruction(s, psirgb); - commit_alu_instruction(s, psialpha); - goto success; - } - } - prev = NULL; - /* No success in pairing, now try to convert one of the RGB - * instructions to an Alpha so we can pair it with another RGB. - */ - if (s->ReadyRGB && s->ReadyRGB->NextReady) { - for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { - if ((*prgb)->NumWriteValues == 1) { - struct schedule_instruction * prgb_next; - if (!convert_rgb_to_alpha(s, *prgb)) - goto cont_loop; - prgb_next = (*prgb)->NextReady; - /* Add instruction to the Alpha ready list. */ - (*prgb)->NextReady = s->ReadyAlpha; - s->ReadyAlpha = *prgb; - /* Remove instruction from the RGB ready list.*/ - if (prev) - prev->NextReady = prgb_next; - else - s->ReadyRGB = prgb_next; - goto pair; - } -cont_loop: - prev = *prgb; - } - } - /* Still no success in pairing, just take the first RGB - * or alpha instruction. */ - if (s->ReadyRGB) { - sinst = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; - } else if (s->ReadyAlpha) { - sinst = s->ReadyAlpha; - s->ReadyAlpha = s->ReadyAlpha->NextReady; - } else { - /*XXX Something real bad has happened. */ - assert(0); - } - - rc_insert_instruction(before->Prev, sinst->Instruction); - commit_alu_instruction(s, sinst); - success: ; - } - /* If the instruction we just emitted uses a presubtract value, and - * the presubtract sources were written by the previous intstruction, - * the previous instruction needs a nop. */ - presub_nop(before->Prev); -} - -static void scan_read(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int chan) -{ - struct schedule_state * s = data; - struct reg_value ** v = get_reg_valuep(s, file, index, chan); - struct reg_value_reader * reader; - - if (!v) - return; - - if (*v && (*v)->Writer == s->Current) { - /* The instruction reads and writes to a register component. - * In this case, we only want to increment dependencies by one. - */ - return; - } - - DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); - - reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); - reader->Reader = s->Current; - if (!*v) { - /* In this situation, the instruction reads from a register - * that hasn't been written to or read from in the current - * block. */ - *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); - memset(*v, 0, sizeof(struct reg_value)); - (*v)->Readers = reader; - } else { - reader->Next = (*v)->Readers; - (*v)->Readers = reader; - /* Only update the current instruction's dependencies if the - * register it reads from has been written to in this block. */ - if ((*v)->Writer) { - s->Current->NumDependencies++; - } - } - (*v)->NumReaders++; - - if (s->Current->NumReadValues >= 12) { - rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); - } else { - s->Current->ReadValues[s->Current->NumReadValues++] = *v; - } -} - -static void scan_write(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int chan) -{ - struct schedule_state * s = data; - struct reg_value ** pv = get_reg_valuep(s, file, index, chan); - struct reg_value * newv; - - if (!pv) - return; - - DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); - - newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); - memset(newv, 0, sizeof(*newv)); - - newv->Writer = s->Current; - - if (*pv) { - (*pv)->Next = newv; - s->Current->NumDependencies++; - } - - *pv = newv; - - if (s->Current->NumWriteValues >= 4) { - rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__); - } else { - s->Current->WriteValues[s->Current->NumWriteValues++] = newv; - } -} - -static void is_rgb_to_alpha_possible_normal( - void * userdata, - struct rc_instruction * inst, - struct rc_src_register * src) -{ - struct rc_reader_data * reader_data = userdata; - reader_data->Abort = 1; - -} - -static void schedule_block(struct r300_fragment_program_compiler * c, - struct rc_instruction * begin, struct rc_instruction * end) -{ - struct schedule_state s; - unsigned int ip; - - memset(&s, 0, sizeof(s)); - s.C = &c->Base; - - /* Scan instructions for data dependencies */ - ip = 0; - for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { - s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); - memset(s.Current, 0, sizeof(struct schedule_instruction)); - - s.Current->Instruction = inst; - inst->IP = ip++; - - DBG("%i: Scanning\n", inst->IP); - - /* The order of things here is subtle and maybe slightly - * counter-intuitive, to account for the case where an - * instruction writes to the same register as it reads - * from. */ - rc_for_all_writes_chan(inst, &scan_write, &s); - rc_for_all_reads_chan(inst, &scan_read, &s); - - DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); - - if (!s.Current->NumDependencies) - instruction_ready(&s, s.Current); - - /* Get global readers for possible RGB->Alpha conversion. */ - s.Current->GlobalReaders.ExitOnAbort = 1; - rc_get_readers(s.C, inst, &s.Current->GlobalReaders, - is_rgb_to_alpha_possible_normal, - is_rgb_to_alpha_possible, NULL); - } - - /* Temporarily unlink all instructions */ - begin->Prev->Next = end; - end->Prev = begin->Prev; - - /* Schedule instructions back */ - while(!s.C->Error && - (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { - if (s.ReadyTEX) - emit_all_tex(&s, end); - - while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)) - emit_one_alu(&s, end); - } -} - -static int is_controlflow(struct rc_instruction * inst) -{ - if (inst->Type == RC_INSTRUCTION_NORMAL) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - return opcode->IsFlowControl; - } - return 0; -} - -void rc_pair_schedule(struct radeon_compiler *cc, void *user) -{ - struct schedule_state s; - - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; - struct rc_instruction * inst = c->Base.Program.Instructions.Next; - - memset(&s, 0, sizeof(s)); - s.C = &c->Base; - while(inst != &c->Base.Program.Instructions) { - struct rc_instruction * first; - - if (is_controlflow(inst)) { - inst = inst->Next; - continue; - } - - first = inst; - - while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) - inst = inst->Next; - - DBG("Schedule one block\n"); - schedule_block(c, first, inst); - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c deleted file mode 100644 index 2dae56a..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_program_pair.h" - -#include "radeon_compiler.h" -#include "radeon_compiler_util.h" - - -/** - * Finally rewrite ADD, MOV, MUL as the appropriate native instruction - * and reverse the order of arguments for CMP. - */ -static void final_rewrite(struct rc_sub_instruction *inst) -{ - struct rc_src_register tmp; - - switch(inst->Opcode) { - case RC_OPCODE_ADD: - inst->SrcReg[2] = inst->SrcReg[1]; - inst->SrcReg[1].File = RC_FILE_NONE; - inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; - inst->SrcReg[1].Negate = RC_MASK_NONE; - inst->Opcode = RC_OPCODE_MAD; - break; - case RC_OPCODE_CMP: - tmp = inst->SrcReg[2]; - inst->SrcReg[2] = inst->SrcReg[0]; - inst->SrcReg[0] = tmp; - break; - case RC_OPCODE_MOV: - /* AMD say we should use CMP. - * However, when we transform - * KIL -r0; - * into - * CMP tmp, -r0, -r0, 0; - * KIL tmp; - * we get incorrect behaviour on R500 when r0 == 0.0. - * It appears that the R500 KIL hardware treats -0.0 as less - * than zero. - */ - inst->SrcReg[1].File = RC_FILE_NONE; - inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; - inst->SrcReg[2].File = RC_FILE_NONE; - inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; - inst->Opcode = RC_OPCODE_MAD; - break; - case RC_OPCODE_MUL: - inst->SrcReg[2].File = RC_FILE_NONE; - inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; - inst->Opcode = RC_OPCODE_MAD; - break; - default: - /* nothing to do */ - break; - } -} - - -/** - * Classify an instruction according to which ALUs etc. it needs - */ -static void classify_instruction(struct rc_sub_instruction * inst, - int * needrgb, int * needalpha, int * istranscendent) -{ - *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; - *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; - *istranscendent = 0; - - if (inst->WriteALUResult == RC_ALURESULT_X) - *needrgb = 1; - else if (inst->WriteALUResult == RC_ALURESULT_W) - *needalpha = 1; - - switch(inst->Opcode) { - case RC_OPCODE_ADD: - case RC_OPCODE_CMP: - case RC_OPCODE_CND: - case RC_OPCODE_DDX: - case RC_OPCODE_DDY: - case RC_OPCODE_FRC: - case RC_OPCODE_MAD: - case RC_OPCODE_MAX: - case RC_OPCODE_MIN: - case RC_OPCODE_MOV: - case RC_OPCODE_MUL: - break; - case RC_OPCODE_COS: - case RC_OPCODE_EX2: - case RC_OPCODE_LG2: - case RC_OPCODE_RCP: - case RC_OPCODE_RSQ: - case RC_OPCODE_SIN: - *istranscendent = 1; - *needalpha = 1; - break; - case RC_OPCODE_DP4: - *needalpha = 1; - /* fall through */ - case RC_OPCODE_DP3: - *needrgb = 1; - break; - default: - break; - } -} - -static void src_uses(struct rc_src_register src, unsigned int * rgb, - unsigned int * alpha) -{ - int j; - for(j = 0; j < 4; ++j) { - unsigned int swz = GET_SWZ(src.Swizzle, j); - if (swz < 3) - *rgb = 1; - else if (swz < 4) - *alpha = 1; - } -} - -/** - * Fill the given ALU instruction's opcodes and source operands into the given pair, - * if possible. - */ -static void set_pair_instruction(struct r300_fragment_program_compiler *c, - struct rc_pair_instruction * pair, - struct rc_sub_instruction * inst) -{ - int needrgb, needalpha, istranscendent; - const struct rc_opcode_info * opcode; - int i; - - memset(pair, 0, sizeof(struct rc_pair_instruction)); - - classify_instruction(inst, &needrgb, &needalpha, &istranscendent); - - if (needrgb) { - if (istranscendent) - pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; - else - pair->RGB.Opcode = inst->Opcode; - if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) - pair->RGB.Saturate = 1; - } - if (needalpha) { - pair->Alpha.Opcode = inst->Opcode; - if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) - pair->Alpha.Saturate = 1; - } - - opcode = rc_get_opcode_info(inst->Opcode); - - /* Presubtract handling: - * We need to make sure that the values used by the presubtract - * operation end up in src0 or src1. */ - if(inst->PreSub.Opcode != RC_PRESUB_NONE) { - /* rc_pair_alloc_source() will fill in data for - * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */ - int j; - for(j = 0; j < 3; j++) { - int src_regs; - if(inst->SrcReg[j].File != RC_FILE_PRESUB) - continue; - - src_regs = rc_presubtract_src_reg_count( - inst->PreSub.Opcode); - for(i = 0; i < src_regs; i++) { - unsigned int rgb = 0; - unsigned int alpha = 0; - src_uses(inst->SrcReg[j], &rgb, &alpha); - if(rgb) { - pair->RGB.Src[i].File = - inst->PreSub.SrcReg[i].File; - pair->RGB.Src[i].Index = - inst->PreSub.SrcReg[i].Index; - pair->RGB.Src[i].Used = 1; - } - if(alpha) { - pair->Alpha.Src[i].File = - inst->PreSub.SrcReg[i].File; - pair->Alpha.Src[i].Index = - inst->PreSub.SrcReg[i].Index; - pair->Alpha.Src[i].Used = 1; - } - } - } - } - - for(i = 0; i < opcode->NumSrcRegs; ++i) { - int source; - if (needrgb && !istranscendent) { - unsigned int srcrgb = 0; - unsigned int srcalpha = 0; - unsigned int srcmask = 0; - int j; - /* We don't care about the alpha channel here. We only - * want the part of the swizzle that writes to rgb, - * since we are creating an rgb instruction. */ - for(j = 0; j < 3; ++j) { - unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - - if (swz < RC_SWIZZLE_W) - srcrgb = 1; - else if (swz == RC_SWIZZLE_W) - srcalpha = 1; - - if (swz < RC_SWIZZLE_UNUSED) - srcmask |= 1 << j; - } - source = rc_pair_alloc_source(pair, srcrgb, srcalpha, - inst->SrcReg[i].File, inst->SrcReg[i].Index); - if (source < 0) { - rc_error(&c->Base, "Failed to translate " - "rgb instruction.\n"); - return; - } - pair->RGB.Arg[i].Source = source; - pair->RGB.Arg[i].Swizzle = - rc_init_swizzle(inst->SrcReg[i].Swizzle, 3); - pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); - } - if (needalpha) { - unsigned int srcrgb = 0; - unsigned int srcalpha = 0; - unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3); - if (swz < 3) - srcrgb = 1; - else if (swz < 4) - srcalpha = 1; - source = rc_pair_alloc_source(pair, srcrgb, srcalpha, - inst->SrcReg[i].File, inst->SrcReg[i].Index); - if (source < 0) { - rc_error(&c->Base, "Failed to translate " - "alpha instruction.\n"); - return; - } - pair->Alpha.Arg[i].Source = source; - pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); - pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); - } - } - - /* Destination handling */ - if (inst->DstReg.File == RC_FILE_OUTPUT) { - if (inst->DstReg.Index == c->OutputDepth) { - pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else { - for (i = 0; i < 4; i++) { - if (inst->DstReg.Index == c->OutputColor[i]) { - pair->RGB.Target = i; - pair->Alpha.Target = i; - pair->RGB.OutputWriteMask |= - inst->DstReg.WriteMask & RC_MASK_XYZ; - pair->Alpha.OutputWriteMask |= - GET_BIT(inst->DstReg.WriteMask, 3); - break; - } - } - } - } else { - if (needrgb) { - pair->RGB.DestIndex = inst->DstReg.Index; - pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; - } - - if (needalpha) { - pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3); - if (pair->Alpha.WriteMask) { - pair->Alpha.DestIndex = inst->DstReg.Index; - } - } - } - - if (inst->WriteALUResult) { - pair->WriteALUResult = inst->WriteALUResult; - pair->ALUResultCompare = inst->ALUResultCompare; - } -} - - -static void check_opcode_support(struct r300_fragment_program_compiler *c, - struct rc_sub_instruction *inst) -{ - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - - if (opcode->HasDstReg) { - if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { - rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); - return; - } - } - - for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { - if (inst->SrcReg[i].RelAddr) { - rc_error(&c->Base, "Fragment program does not support relative addressing " - " of source operands.\n"); - return; - } - } -} - - -/** - * Translate all ALU instructions into corresponding pair instructions, - * performing no other changes. - */ -void rc_pair_translate(struct radeon_compiler *cc, void *user) -{ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; - - for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; - inst != &c->Base.Program.Instructions; - inst = inst->Next) { - const struct rc_opcode_info * opcode; - struct rc_sub_instruction copy; - - if (inst->Type != RC_INSTRUCTION_NORMAL) - continue; - - opcode = rc_get_opcode_info(inst->U.I.Opcode); - - if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) - continue; - - copy = inst->U.I; - - check_opcode_support(c, ©); - - final_rewrite(©); - inst->Type = RC_INSTRUCTION_PAIR; - set_pair_instruction(c, &inst->U.P, ©); - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c deleted file mode 100644 index fe5756e..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_program.h" - -#include - -#include "radeon_compiler.h" -#include "radeon_dataflow.h" - - -/** - * Transform the given clause in the following way: - * 1. Replace it with an empty clause - * 2. For every instruction in the original clause, try the given - * transformations in order. - * 3. If one of the transformations returns GL_TRUE, assume that it - * has emitted the appropriate instruction(s) into the new clause; - * otherwise, copy the instruction verbatim. - * - * \note The transformation is currently not recursive; in other words, - * instructions emitted by transformations are not transformed. - * - * \note The transform is called 'local' because it can only look at - * one instruction at a time. - */ -void rc_local_transform( - struct radeon_compiler * c, - void *user) -{ - struct radeon_program_transformation *transformations = - (struct radeon_program_transformation*)user; - struct rc_instruction * inst = c->Program.Instructions.Next; - - while(inst != &c->Program.Instructions) { - struct rc_instruction * current = inst; - int i; - - inst = inst->Next; - - for(i = 0; transformations[i].function; ++i) { - struct radeon_program_transformation* t = transformations + i; - - if (t->function(c, current, t->userData)) - break; - } - } -} - -struct get_used_temporaries_data { - unsigned char * Used; - unsigned int UsedLength; -}; - -static void get_used_temporaries_cb( - void * userdata, - struct rc_instruction * inst, - rc_register_file file, - unsigned int index, - unsigned int mask) -{ - struct get_used_temporaries_data * d = userdata; - - if (file != RC_FILE_TEMPORARY) - return; - - if (index >= d->UsedLength) - return; - - d->Used[index] |= mask; -} - -/** - * This function fills in the parameter 'used' with a writemask that - * represent which components of each temporary register are used by the - * program. This is meant to be combined with rc_find_free_temporary_list as a - * more efficient version of rc_find_free_temporary. - * @param used The function does not initialize this parameter. - */ -void rc_get_used_temporaries( - struct radeon_compiler * c, - unsigned char * used, - unsigned int used_length) -{ - struct rc_instruction * inst; - struct get_used_temporaries_data d; - d.Used = used; - d.UsedLength = used_length; - - for(inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; inst = inst->Next) { - - rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d); - rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d); - } -} - -/* Search a list of used temporaries for a free one - * \sa rc_get_used_temporaries - * @note If this functions finds a free temporary, it will mark it as used - * in the used temporary list (param 'used') - * @param used list of used temporaries - * @param used_length number of items in param 'used' - * @param mask which components must be free in the temporary index that is - * returned. - * @return -1 If there are no more free temporaries, otherwise the index of - * a temporary register where the components specified in param 'mask' are - * not being used. - */ -int rc_find_free_temporary_list( - struct radeon_compiler * c, - unsigned char * used, - unsigned int used_length, - unsigned int mask) -{ - int i; - for(i = 0; i < used_length; i++) { - if ((~used[i] & mask) == mask) { - used[i] |= mask; - return i; - } - } - return -1; -} - -unsigned int rc_find_free_temporary(struct radeon_compiler * c) -{ - unsigned char used[RC_REGISTER_MAX_INDEX]; - int free; - - memset(used, 0, sizeof(used)); - - rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX); - - free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX, - RC_MASK_XYZW); - if (free < 0) { - rc_error(c, "Ran out of temporary registers\n"); - return 0; - } - return free; -} - - -struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) -{ - struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); - - memset(inst, 0, sizeof(struct rc_instruction)); - - inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; - inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; - - return inst; -} - -void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst) -{ - inst->Prev = after; - inst->Next = after->Next; - - inst->Prev->Next = inst; - inst->Next->Prev = inst; -} - -struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) -{ - struct rc_instruction * inst = rc_alloc_instruction(c); - - rc_insert_instruction(after, inst); - - return inst; -} - -void rc_remove_instruction(struct rc_instruction * inst) -{ - inst->Prev->Next = inst->Next; - inst->Next->Prev = inst->Prev; -} - -/** - * Return the number of instructions in the program. - */ -unsigned int rc_recompute_ips(struct radeon_compiler * c) -{ - unsigned int ip = 0; - struct rc_instruction * inst; - - for(inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - inst->IP = ip++; - } - - c->Program.Instructions.IP = 0xcafedead; - - return ip; -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h deleted file mode 100644 index b899ecc..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_H_ -#define __RADEON_PROGRAM_H_ - -#include -#include - -#include "radeon_opcodes.h" -#include "radeon_code.h" -#include "radeon_program_constants.h" -#include "radeon_program_pair.h" - -struct radeon_compiler; - -struct rc_src_register { - unsigned int File:4; - - /** Negative values may be used for relative addressing. */ - signed int Index:(RC_REGISTER_INDEX_BITS+1); - unsigned int RelAddr:1; - - unsigned int Swizzle:12; - - /** Take the component-wise absolute value */ - unsigned int Abs:1; - - /** Post-Abs negation. */ - unsigned int Negate:4; -}; - -struct rc_dst_register { - unsigned int File:3; - unsigned int Index:RC_REGISTER_INDEX_BITS; - unsigned int WriteMask:4; -}; - -struct rc_presub_instruction { - rc_presubtract_op Opcode; - struct rc_src_register SrcReg[2]; -}; - -/** - * Instructions are maintained by the compiler in a doubly linked list - * of these structures. - * - * This instruction format is intended to be expanded for hardware-specific - * trickery. At different stages of compilation, a different set of - * instruction types may be valid. - */ -struct rc_sub_instruction { - struct rc_src_register SrcReg[3]; - struct rc_dst_register DstReg; - - /** - * Opcode of this instruction, according to \ref rc_opcode enums. - */ - unsigned int Opcode:8; - - /** - * Saturate each value of the result to the range [0,1] or [-1,1], - * according to \ref rc_saturate_mode enums. - */ - unsigned int SaturateMode:2; - - /** - * Writing to the special register RC_SPECIAL_ALU_RESULT - */ - /*@{*/ - unsigned int WriteALUResult:2; - unsigned int ALUResultCompare:3; - /*@}*/ - - /** - * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. - */ - /*@{*/ - /** Source texture unit. */ - unsigned int TexSrcUnit:5; - - /** Source texture target, one of the \ref rc_texture_target enums */ - unsigned int TexSrcTarget:3; - - /** True if tex instruction should do shadow comparison */ - unsigned int TexShadow:1; - - /**R500 Only. How to swizzle the result of a TEX lookup*/ - unsigned int TexSwizzle:12; - /*@}*/ - - /** This holds information about the presubtract operation used by - * this instruction. */ - struct rc_presub_instruction PreSub; -}; - -typedef enum { - RC_INSTRUCTION_NORMAL = 0, - RC_INSTRUCTION_PAIR -} rc_instruction_type; - -struct rc_instruction { - struct rc_instruction * Prev; - struct rc_instruction * Next; - - rc_instruction_type Type; - union { - struct rc_sub_instruction I; - struct rc_pair_instruction P; - } U; - - /** - * Warning: IPs are not stable. If you want to use them, - * you need to recompute them at the beginning of each pass - * using \ref rc_recompute_ips - */ - unsigned int IP; -}; - -struct rc_program { - /** - * Instructions.Next points to the first instruction, - * Instructions.Prev points to the last instruction. - */ - struct rc_instruction Instructions; - - /* Long term, we should probably remove InputsRead & OutputsWritten, - * since updating dependent state can be fragile, and they aren't - * actually used very often. */ - uint32_t InputsRead; - uint32_t OutputsWritten; - uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ - - struct rc_constant_list Constants; -}; - -/** - * A transformation that can be passed to \ref rc_local_transform. - * - * The function will be called once for each instruction. - * It has to either emit the appropriate transformed code for the instruction - * and return true, or return false if it doesn't understand the - * instruction. - * - * The function gets passed the userData as last parameter. - */ -struct radeon_program_transformation { - int (*function)( - struct radeon_compiler*, - struct rc_instruction*, - void*); - void *userData; -}; - -void rc_local_transform( - struct radeon_compiler *c, - void *user); - -void rc_get_used_temporaries( - struct radeon_compiler * c, - unsigned char * used, - unsigned int used_length); - -int rc_find_free_temporary_list( - struct radeon_compiler * c, - unsigned char * used, - unsigned int used_length, - unsigned int mask); - -unsigned int rc_find_free_temporary(struct radeon_compiler * c); - -struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); -struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); -void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst); -void rc_remove_instruction(struct rc_instruction * inst); - -unsigned int rc_recompute_ips(struct radeon_compiler * c); - -void rc_print_program(const struct rc_program *prog); - -rc_swizzle rc_mask_to_swizzle(unsigned int mask); -#endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c deleted file mode 100644 index 9fc9911..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ /dev/null @@ -1,1154 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * - * Shareable transformations that transform "special" ALU instructions - * into ALU instructions that are supported by hardware. - * - */ - -#include "radeon_program_alu.h" - -#include "radeon_compiler.h" -#include "radeon_compiler_util.h" - - -static struct rc_instruction *emit1( - struct radeon_compiler * c, struct rc_instruction * after, - rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, - struct rc_src_register SrcReg) -{ - struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - - fpi->U.I.Opcode = Opcode; - fpi->U.I.SaturateMode = Saturate; - fpi->U.I.DstReg = DstReg; - fpi->U.I.SrcReg[0] = SrcReg; - return fpi; -} - -static struct rc_instruction *emit2( - struct radeon_compiler * c, struct rc_instruction * after, - rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, - struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) -{ - struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - - fpi->U.I.Opcode = Opcode; - fpi->U.I.SaturateMode = Saturate; - fpi->U.I.DstReg = DstReg; - fpi->U.I.SrcReg[0] = SrcReg0; - fpi->U.I.SrcReg[1] = SrcReg1; - return fpi; -} - -static struct rc_instruction *emit3( - struct radeon_compiler * c, struct rc_instruction * after, - rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, - struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, - struct rc_src_register SrcReg2) -{ - struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - - fpi->U.I.Opcode = Opcode; - fpi->U.I.SaturateMode = Saturate; - fpi->U.I.DstReg = DstReg; - fpi->U.I.SrcReg[0] = SrcReg0; - fpi->U.I.SrcReg[1] = SrcReg1; - fpi->U.I.SrcReg[2] = SrcReg2; - return fpi; -} - -static struct rc_dst_register dstregtmpmask(int index, int mask) -{ - struct rc_dst_register dst = {0}; - dst.File = RC_FILE_TEMPORARY; - dst.Index = index; - dst.WriteMask = mask; - return dst; -} - -static const struct rc_src_register builtin_zero = { - .File = RC_FILE_NONE, - .Index = 0, - .Swizzle = RC_SWIZZLE_0000 -}; -static const struct rc_src_register builtin_one = { - .File = RC_FILE_NONE, - .Index = 0, - .Swizzle = RC_SWIZZLE_1111 -}; -static const struct rc_src_register srcreg_undefined = { - .File = RC_FILE_NONE, - .Index = 0, - .Swizzle = RC_SWIZZLE_XYZW -}; - -static struct rc_src_register srcreg(int file, int index) -{ - struct rc_src_register src = srcreg_undefined; - src.File = file; - src.Index = index; - return src; -} - -static struct rc_src_register srcregswz(int file, int index, int swz) -{ - struct rc_src_register src = srcreg_undefined; - src.File = file; - src.Index = index; - src.Swizzle = swz; - return src; -} - -static struct rc_src_register absolute(struct rc_src_register reg) -{ - struct rc_src_register newreg = reg; - newreg.Abs = 1; - newreg.Negate = RC_MASK_NONE; - return newreg; -} - -static struct rc_src_register negate(struct rc_src_register reg) -{ - struct rc_src_register newreg = reg; - newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; - return newreg; -} - -static struct rc_src_register swizzle(struct rc_src_register reg, - rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) -{ - struct rc_src_register swizzled = reg; - swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); - return swizzled; -} - -static struct rc_src_register swizzle_smear(struct rc_src_register reg, - rc_swizzle x) -{ - return swizzle(reg, x, x, x, x); -} - -static struct rc_src_register swizzle_xxxx(struct rc_src_register reg) -{ - return swizzle_smear(reg, RC_SWIZZLE_X); -} - -static struct rc_src_register swizzle_yyyy(struct rc_src_register reg) -{ - return swizzle_smear(reg, RC_SWIZZLE_Y); -} - -static struct rc_src_register swizzle_zzzz(struct rc_src_register reg) -{ - return swizzle_smear(reg, RC_SWIZZLE_Z); -} - -static struct rc_src_register swizzle_wwww(struct rc_src_register reg) -{ - return swizzle_smear(reg, RC_SWIZZLE_W); -} - -static int is_dst_safe_to_reuse(struct rc_instruction *inst) -{ - const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); - unsigned i; - - assert(info->HasDstReg); - - if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) - return 0; - - for (i = 0; i < info->NumSrcRegs; i++) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) - return 0; - } - - return 1; -} - -static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, - struct rc_instruction *inst) -{ - unsigned tmp; - - if (is_dst_safe_to_reuse(inst)) - tmp = inst->U.I.DstReg.Index; - else - tmp = rc_find_free_temporary(c); - - return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); -} - -static void transform_ABS(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_src_register src = inst->U.I.SrcReg[0]; - src.Abs = 1; - src.Negate = RC_MASK_NONE; - emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src); - rc_remove_instruction(inst); -} - -static void transform_CEIL(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - /* Assuming: - * ceil(x) = -floor(-x) - * - * After inlining floor: - * ceil(x) = -(-x-frac(-x)) - * - * After simplification: - * ceil(x) = x+frac(-x) - */ - - struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); - emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); - rc_remove_instruction(inst); -} - -static void transform_CLAMP(struct radeon_compiler *c, - struct rc_instruction *inst) -{ - /* CLAMP dst, src, min, max - * into: - * MIN tmp, src, max - * MAX dst, tmp, min - */ - struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, - inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); - emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); - rc_remove_instruction(inst); -} - -static void transform_DP2(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_src_register src0 = inst->U.I.SrcReg[0]; - struct rc_src_register src1 = inst->U.I.SrcReg[1]; - src0.Negate &= ~(RC_MASK_Z | RC_MASK_W); - src0.Swizzle &= ~(63 << (3 * 2)); - src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); - src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); - src1.Swizzle &= ~(63 << (3 * 2)); - src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); - emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); - rc_remove_instruction(inst); -} - -static void transform_DPH(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_src_register src0 = inst->U.I.SrcReg[0]; - src0.Negate &= ~RC_MASK_W; - src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); - emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); - rc_remove_instruction(inst); -} - -/** - * [1, src0.y*src1.y, src0.z, src1.w] - * So basically MUL with lotsa swizzling. - */ -static void transform_DST(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg, - swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), - swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); - rc_remove_instruction(inst); -} - -static void transform_FLR(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); - emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); - rc_remove_instruction(inst); -} - -/** - * Definition of LIT (from ARB_fragment_program): - * - * tmp = VectorLoad(op0); - * if (tmp.x < 0) tmp.x = 0; - * if (tmp.y < 0) tmp.y = 0; - * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - * result.x = 1.0; - * result.y = tmp.x; - * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - * result.w = 1.0; - * - * The longest path of computation is the one leading to result.z, - * consisting of 5 operations. This implementation of LIT takes - * 5 slots, if the subsequent optimization passes are clever enough - * to pair instructions correctly. - */ -static void transform_LIT(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - unsigned int constant; - unsigned int constant_swizzle; - unsigned int temp; - struct rc_src_register srctemp; - - constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); - - if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { - struct rc_instruction * inst_mov; - - inst_mov = emit1(c, inst, - RC_OPCODE_MOV, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); - - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - } - - temp = inst->U.I.DstReg.Index; - srctemp = srcreg(RC_FILE_TEMPORARY, temp); - - /* tmp.x = max(0.0, Src.x); */ - /* tmp.y = max(0.0, Src.y); */ - /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */ - emit2(c, inst->Prev, RC_OPCODE_MAX, 0, - dstregtmpmask(temp, RC_MASK_XYW), - inst->U.I.SrcReg[0], - swizzle(srcreg(RC_FILE_CONSTANT, constant), - RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); - emit2(c, inst->Prev, RC_OPCODE_MIN, 0, - dstregtmpmask(temp, RC_MASK_Z), - swizzle_wwww(srctemp), - negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); - - /* tmp.w = Pow(tmp.y, tmp.w) */ - emit1(c, inst->Prev, RC_OPCODE_LG2, 0, - dstregtmpmask(temp, RC_MASK_W), - swizzle_yyyy(srctemp)); - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, - dstregtmpmask(temp, RC_MASK_W), - swizzle_wwww(srctemp), - swizzle_zzzz(srctemp)); - emit1(c, inst->Prev, RC_OPCODE_EX2, 0, - dstregtmpmask(temp, RC_MASK_W), - swizzle_wwww(srctemp)); - - /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */ - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, - dstregtmpmask(temp, RC_MASK_Z), - negate(swizzle_xxxx(srctemp)), - swizzle_wwww(srctemp), - builtin_zero); - - /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */ - emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, - dstregtmpmask(temp, RC_MASK_XYW), - swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); - - rc_remove_instruction(inst); -} - -static void transform_LRP(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_dst_register dst = try_to_reuse_dst(c, inst); - - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, - dst, - inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); - emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, - inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); - - rc_remove_instruction(inst); -} - -static void transform_POW(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); - struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); - tempdst.WriteMask = RC_MASK_W; - tempsrc.Swizzle = RC_SWIZZLE_WWWW; - - emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0])); - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1])); - emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc); - - rc_remove_instruction(inst); -} - -static void transform_RSQ(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); -} - -static void transform_SEQ(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_dst_register dst = try_to_reuse_dst(c, inst); - - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); - - rc_remove_instruction(inst); -} - -static void transform_SFL(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero); - rc_remove_instruction(inst); -} - -static void transform_SGE(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_dst_register dst = try_to_reuse_dst(c, inst); - - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); - - rc_remove_instruction(inst); -} - -static void transform_SGT(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_dst_register dst = try_to_reuse_dst(c, inst); - - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); - - rc_remove_instruction(inst); -} - -static void transform_SLE(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_dst_register dst = try_to_reuse_dst(c, inst); - - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); - - rc_remove_instruction(inst); -} - -static void transform_SLT(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_dst_register dst = try_to_reuse_dst(c, inst); - - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); - - rc_remove_instruction(inst); -} - -static void transform_SNE(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_dst_register dst = try_to_reuse_dst(c, inst); - - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); - - rc_remove_instruction(inst); -} - -static void transform_SSG(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - /* result = sign(x) - * - * CMP tmp0, -x, 1, 0 - * CMP tmp1, x, 1, 0 - * ADD result, tmp0, -tmp1; - */ - struct rc_dst_register dst0; - unsigned tmp1; - - /* 0 < x */ - dst0 = try_to_reuse_dst(c, inst); - emit3(c, inst->Prev, RC_OPCODE_CMP, 0, - dst0, - negate(inst->U.I.SrcReg[0]), - builtin_one, - builtin_zero); - - /* x < 0 */ - tmp1 = rc_find_free_temporary(c); - emit3(c, inst->Prev, RC_OPCODE_CMP, 0, - dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), - inst->U.I.SrcReg[0], - builtin_one, - builtin_zero); - - /* Either both are zero, or one of them is one and the other is zero. */ - /* result = tmp0 - tmp1 */ - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, - inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, dst0.Index), - negate(srcreg(RC_FILE_TEMPORARY, tmp1))); - - rc_remove_instruction(inst); -} - -static void transform_SUB(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - inst->U.I.Opcode = RC_OPCODE_ADD; - inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]); -} - -static void transform_SWZ(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - inst->U.I.Opcode = RC_OPCODE_MOV; -} - -static void transform_XPD(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_dst_register dst = try_to_reuse_dst(c, inst); - - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, - swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), - swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); - emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, - swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), - swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), - negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); - - rc_remove_instruction(inst); -} - - -/** - * Can be used as a transformation for @ref radeonClauseLocalTransform, - * no userData necessary. - * - * Eliminates the following ALU instructions: - * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD - * using: - * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP - * - * Transforms RSQ to Radeon's native RSQ by explicitly setting - * absolute value. - * - * @note should be applicable to R300 and R500 fragment programs. - */ -int radeonTransformALU( - struct radeon_compiler * c, - struct rc_instruction* inst, - void* unused) -{ - switch(inst->U.I.Opcode) { - case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; - case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; - case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; - case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; - case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; - case RC_OPCODE_DST: transform_DST(c, inst); return 1; - case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; - case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; - case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; - case RC_OPCODE_POW: transform_POW(c, inst); return 1; - case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; - case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1; - case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; - case RC_OPCODE_SGE: transform_SGE(c, inst); return 1; - case RC_OPCODE_SGT: transform_SGT(c, inst); return 1; - case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; - case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; - case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; - case RC_OPCODE_SSG: transform_SSG(c, inst); return 1; - case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; - case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; - case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; - default: - return 0; - } -} - - -static void transform_r300_vertex_ABS(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - /* Note: r500 can take absolute values, but r300 cannot. */ - inst->U.I.Opcode = RC_OPCODE_MAX; - inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0]; - inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; -} - -static void transform_r300_vertex_CMP(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - /* There is no decent CMP available, so let's rig one up. - * CMP is defined as dst = src0 < 0.0 ? src1 : src2 - * The following sequence consumes zero to two temps and two extra slots - * (the second temp and the second slot is consumed by transform_LRP), - * but should be equivalent: - * - * SLT tmp0, src0, 0.0 - * LRP dst, tmp0, src1, src2 - * - * Yes, I know, I'm a mad scientist. ~ C. & M. */ - struct rc_dst_register dst = try_to_reuse_dst(c, inst); - - /* SLT tmp0, src0, 0.0 */ - emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dst, - inst->U.I.SrcReg[0], builtin_zero); - - /* LRP dst, tmp0, src1, src2 */ - transform_LRP(c, - emit3(c, inst->Prev, RC_OPCODE_LRP, 0, - inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); - - rc_remove_instruction(inst); -} - -static void transform_r300_vertex_DP2(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_instruction *next_inst = inst->Next; - transform_DP2(c, inst); - next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4; -} - -static void transform_r300_vertex_DP3(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_src_register src0 = inst->U.I.SrcReg[0]; - struct rc_src_register src1 = inst->U.I.SrcReg[1]; - src0.Negate &= ~RC_MASK_W; - src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); - src1.Negate &= ~RC_MASK_W; - src1.Swizzle &= ~(7 << (3 * 3)); - src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); - emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); - rc_remove_instruction(inst); -} - -static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_dst_register dst = try_to_reuse_dst(c, inst); - unsigned constant_swizzle; - int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, - 0.0000000000000000001, - &constant_swizzle); - - /* MOV dst, src */ - dst.WriteMask = RC_MASK_XYZW; - emit1(c, inst->Prev, RC_OPCODE_MOV, 0, - dst, - inst->U.I.SrcReg[0]); - - /* MAX dst.y, src, 0.00...001 */ - emit2(c, inst->Prev, RC_OPCODE_MAX, 0, - dstregtmpmask(dst.Index, RC_MASK_Y), - srcreg(RC_FILE_TEMPORARY, dst.Index), - srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); - - inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); -} - -static void transform_r300_vertex_SEQ(struct radeon_compiler *c, - struct rc_instruction *inst) -{ - /* x = y <==> x >= y && y >= x */ - int tmp = rc_find_free_temporary(c); - - /* x <= y */ - emit2(c, inst->Prev, RC_OPCODE_SGE, 0, - dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), - inst->U.I.SrcReg[0], - inst->U.I.SrcReg[1]); - - /* y <= x */ - emit2(c, inst->Prev, RC_OPCODE_SGE, 0, - inst->U.I.DstReg, - inst->U.I.SrcReg[1], - inst->U.I.SrcReg[0]); - - /* x && y = x * y */ - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, - inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp), - srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); - - rc_remove_instruction(inst); -} - -static void transform_r300_vertex_SNE(struct radeon_compiler *c, - struct rc_instruction *inst) -{ - /* x != y <==> x < y || y < x */ - int tmp = rc_find_free_temporary(c); - - /* x < y */ - emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), - inst->U.I.SrcReg[0], - inst->U.I.SrcReg[1]); - - /* y < x */ - emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - inst->U.I.DstReg, - inst->U.I.SrcReg[1], - inst->U.I.SrcReg[0]); - - /* x || y = max(x, y) */ - emit2(c, inst->Prev, RC_OPCODE_MAX, 0, - inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp), - srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); - - rc_remove_instruction(inst); -} - -static void transform_r300_vertex_SGT(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - /* x > y <==> -x < -y */ - inst->U.I.Opcode = RC_OPCODE_SLT; - inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; - inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; -} - -static void transform_r300_vertex_SLE(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - /* x <= y <==> -x >= -y */ - inst->U.I.Opcode = RC_OPCODE_SGE; - inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; - inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; -} - -static void transform_r300_vertex_SSG(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - /* result = sign(x) - * - * SLT tmp0, 0, x; - * SLT tmp1, x, 0; - * ADD result, tmp0, -tmp1; - */ - struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); - unsigned tmp1; - - /* 0 < x */ - dst0 = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dst0, - builtin_zero, - inst->U.I.SrcReg[0]); - - /* x < 0 */ - tmp1 = rc_find_free_temporary(c); - emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), - inst->U.I.SrcReg[0], - builtin_zero); - - /* Either both are zero, or one of them is one and the other is zero. */ - /* result = tmp0 - tmp1 */ - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, - inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, dst0.Index), - negate(srcreg(RC_FILE_TEMPORARY, tmp1))); - - rc_remove_instruction(inst); -} - -/** - * For use with rc_local_transform, this transforms non-native ALU - * instructions of the r300 up to r500 vertex engine. - */ -int r300_transform_vertex_alu( - struct radeon_compiler * c, - struct rc_instruction* inst, - void* unused) -{ - switch(inst->U.I.Opcode) { - case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; - case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; - case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; - case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; - case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; - case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; - case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; - case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; - case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; - case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; - case RC_OPCODE_SEQ: - if (!c->is_r500) { - transform_r300_vertex_SEQ(c, inst); - return 1; - } - return 0; - case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; - case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1; - case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1; - case RC_OPCODE_SNE: - if (!c->is_r500) { - transform_r300_vertex_SNE(c, inst); - return 1; - } - return 0; - case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1; - case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; - case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; - case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; - default: - return 0; - } -} - -static void sincos_constants(struct radeon_compiler* c, unsigned int *constants) -{ - static const float SinCosConsts[2][4] = { - { - 1.273239545, /* 4/PI */ - -0.405284735, /* -4/(PI*PI) */ - 3.141592654, /* PI */ - 0.2225 /* weight */ - }, - { - 0.75, - 0.5, - 0.159154943, /* 1/(2*PI) */ - 6.283185307 /* 2*PI */ - } - }; - int i; - - for(i = 0; i < 2; ++i) - constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]); -} - -/** - * Approximate sin(x), where x is clamped to (-pi/2, pi/2). - * - * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } - * MAD tmp.x, tmp.y, |src|, tmp.x - * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x - * MAD dest, tmp.y, weight, tmp.x - */ -static void sin_approx( - struct radeon_compiler* c, struct rc_instruction * inst, - struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants) -{ - unsigned int tempreg = rc_find_free_temporary(c); - - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY), - swizzle_xxxx(src), - srcreg(RC_FILE_CONSTANT, constants[0])); - emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X), - swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), - absolute(swizzle_xxxx(src)), - swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); - emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y), - swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), - absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))), - negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)))); - emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst, - swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), - swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])), - swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); -} - -/** - * Translate the trigonometric functions COS, SIN, and SCS - * using only the basic instructions - * MOV, ADD, MUL, MAD, FRC - */ -int r300_transform_trig_simple(struct radeon_compiler* c, - struct rc_instruction* inst, - void* unused) -{ - unsigned int constants[2]; - unsigned int tempreg; - - if (inst->U.I.Opcode != RC_OPCODE_COS && - inst->U.I.Opcode != RC_OPCODE_SIN && - inst->U.I.Opcode != RC_OPCODE_SCS) - return 0; - - tempreg = rc_find_free_temporary(c); - - sincos_constants(c, constants); - - if (inst->U.I.Opcode == RC_OPCODE_COS) { - /* MAD tmp.x, src, 1/(2*PI), 0.75 */ - /* FRC tmp.x, tmp.x */ - /* MAD tmp.z, tmp.x, 2*PI, -PI */ - emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), - swizzle_xxxx(inst->U.I.SrcReg[0]), - swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), - swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1]))); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), - swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); - emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), - swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), - swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), - negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); - - sin_approx(c, inst, inst->U.I.DstReg, - swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), - constants); - } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { - emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), - swizzle_xxxx(inst->U.I.SrcReg[0]), - swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), - swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1]))); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), - swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); - emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), - swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), - swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), - negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); - - sin_approx(c, inst, inst->U.I.DstReg, - swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), - constants); - } else { - struct rc_dst_register dst; - - emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), - swizzle_xxxx(inst->U.I.SrcReg[0]), - swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), - swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), - srcreg(RC_FILE_TEMPORARY, tempreg)); - emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), - srcreg(RC_FILE_TEMPORARY, tempreg), - swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), - negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); - - dst = inst->U.I.DstReg; - - dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; - sin_approx(c, inst, dst, - swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), - constants); - - dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y; - sin_approx(c, inst, dst, - swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), - constants); - } - - rc_remove_instruction(inst); - - return 1; -} - -static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c, - struct rc_instruction *inst, - unsigned srctmp) -{ - if (inst->U.I.Opcode == RC_OPCODE_COS) { - emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); - } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { - emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, - inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); - } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { - struct rc_dst_register moddst = inst->U.I.DstReg; - - if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { - moddst.WriteMask = RC_MASK_X; - emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst, - srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); - } - if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { - moddst.WriteMask = RC_MASK_Y; - emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst, - srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); - } - } - - rc_remove_instruction(inst); -} - - -/** - * Transform the trigonometric functions COS, SIN, and SCS - * to include pre-scaling by 1/(2*PI) and taking the fractional - * part, so that the input to COS and SIN is always in the range [0,1). - * SCS is replaced by one COS and one SIN instruction. - * - * @warning This transformation implicitly changes the semantics of SIN and COS! - */ -int radeonTransformTrigScale(struct radeon_compiler* c, - struct rc_instruction* inst, - void* unused) -{ - static const float RCP_2PI = 0.15915494309189535; - unsigned int temp; - unsigned int constant; - unsigned int constant_swizzle; - - if (inst->U.I.Opcode != RC_OPCODE_COS && - inst->U.I.Opcode != RC_OPCODE_SIN && - inst->U.I.Opcode != RC_OPCODE_SCS) - return 0; - - temp = rc_find_free_temporary(c); - constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); - - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), - swizzle_xxxx(inst->U.I.SrcReg[0]), - srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), - srcreg(RC_FILE_TEMPORARY, temp)); - - r300_transform_SIN_COS_SCS(c, inst, temp); - return 1; -} - -/** - * Transform the trigonometric functions COS, SIN, and SCS - * so that the input to COS and SIN is always in the range [-PI, PI]. - * SCS is replaced by one COS and one SIN instruction. - */ -int r300_transform_trig_scale_vertex(struct radeon_compiler *c, - struct rc_instruction *inst, - void *unused) -{ - static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979}; - unsigned int temp; - unsigned int constant; - - if (inst->U.I.Opcode != RC_OPCODE_COS && - inst->U.I.Opcode != RC_OPCODE_SIN && - inst->U.I.Opcode != RC_OPCODE_SCS) - return 0; - - /* Repeat x in the range [-PI, PI]: - * - * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI - */ - - temp = rc_find_free_temporary(c); - constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons); - - emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), - swizzle_xxxx(inst->U.I.SrcReg[0]), - srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX), - srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY)); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), - srcreg(RC_FILE_TEMPORARY, temp)); - emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), - srcreg(RC_FILE_TEMPORARY, temp), - srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ), - srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW)); - - r300_transform_SIN_COS_SCS(c, inst, temp); - return 1; -} - -/** - * Rewrite DDX/DDY instructions to properly work with r5xx shaders. - * The r5xx MDH/MDV instruction provides per-quad partial derivatives. - * It takes the form A*B+C. A and C are set by setting src0. B should be -1. - * - * @warning This explicitly changes the form of DDX and DDY! - */ - -int radeonTransformDeriv(struct radeon_compiler* c, - struct rc_instruction* inst, - void* unused) -{ - if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) - return 0; - - inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111; - inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; - - return 1; -} - -/** - * IF Temp[0].x -\ - * KILP - > KIL -abs(Temp[0].x) - * ENDIF -/ - * - * This needs to be done in its own pass, because it modifies the instructions - * before and after KILP. - */ -void rc_transform_KILP(struct radeon_compiler * c, void *user) -{ - struct rc_instruction * inst; - for (inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; inst = inst->Next) { - - if (inst->U.I.Opcode != RC_OPCODE_KILP) - continue; - - inst->U.I.Opcode = RC_OPCODE_KIL; - - if (inst->Prev->U.I.Opcode != RC_OPCODE_IF - || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { - inst->U.I.SrcReg[0] = negate(builtin_one); - } else { - - inst->U.I.SrcReg[0] = - negate(absolute(inst->Prev->U.I.SrcReg[0])); - /* Remove IF */ - rc_remove_instruction(inst->Prev); - /* Remove ENDIF */ - rc_remove_instruction(inst->Next); - } - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h deleted file mode 100644 index b5f361e..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_ALU_H_ -#define __RADEON_PROGRAM_ALU_H_ - -#include "radeon_program.h" - -int radeonTransformALU( - struct radeon_compiler * c, - struct rc_instruction * inst, - void*); - -int r300_transform_vertex_alu( - struct radeon_compiler * c, - struct rc_instruction * inst, - void*); - -int r300_transform_trig_simple( - struct radeon_compiler * c, - struct rc_instruction * inst, - void*); - -int radeonTransformTrigScale( - struct radeon_compiler * c, - struct rc_instruction * inst, - void*); - -int r300_transform_trig_scale_vertex( - struct radeon_compiler *c, - struct rc_instruction *inst, - void*); - -int radeonTransformDeriv( - struct radeon_compiler * c, - struct rc_instruction * inst, - void*); - -void rc_transform_KILP(struct radeon_compiler * c, - void *user); - -#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h deleted file mode 100644 index 2457733..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef RADEON_PROGRAM_CONSTANTS_H -#define RADEON_PROGRAM_CONSTANTS_H - -typedef enum { - RC_SATURATE_NONE = 0, - RC_SATURATE_ZERO_ONE, - RC_SATURATE_MINUS_PLUS_ONE -} rc_saturate_mode; - -typedef enum { - RC_TEXTURE_2D_ARRAY, - RC_TEXTURE_1D_ARRAY, - RC_TEXTURE_CUBE, - RC_TEXTURE_3D, - RC_TEXTURE_RECT, - RC_TEXTURE_2D, - RC_TEXTURE_1D -} rc_texture_target; - -typedef enum { - /** - * Used to indicate unused register descriptions and - * source register that use a constant swizzle. - */ - RC_FILE_NONE = 0, - RC_FILE_TEMPORARY, - - /** - * Input register. - * - * \note The compiler attaches no implicit semantics to input registers. - * Fragment/vertex program specific semantics must be defined explicitly - * using the appropriate compiler interfaces. - */ - RC_FILE_INPUT, - - /** - * Output register. - * - * \note The compiler attaches no implicit semantics to input registers. - * Fragment/vertex program specific semantics must be defined explicitly - * using the appropriate compiler interfaces. - */ - RC_FILE_OUTPUT, - RC_FILE_ADDRESS, - - /** - * Indicates a constant from the \ref rc_constant_list . - */ - RC_FILE_CONSTANT, - - /** - * Indicates a special register, see RC_SPECIAL_xxx. - */ - RC_FILE_SPECIAL, - - /** - * Indicates this register should use the result of the presubtract - * operation. - */ - RC_FILE_PRESUB -} rc_register_file; - -enum { - /** R500 fragment program ALU result "register" */ - RC_SPECIAL_ALU_RESULT = 0, - - /** Must be last */ - RC_NUM_SPECIAL_REGISTERS -}; - -#define RC_REGISTER_INDEX_BITS 10 -#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) - -typedef enum { - RC_SWIZZLE_X = 0, - RC_SWIZZLE_Y, - RC_SWIZZLE_Z, - RC_SWIZZLE_W, - RC_SWIZZLE_ZERO, - RC_SWIZZLE_ONE, - RC_SWIZZLE_HALF, - RC_SWIZZLE_UNUSED -} rc_swizzle; - -#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) -#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a)) -#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) -#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) -#define SET_SWZ(swz, idx, newv) \ - do { \ - (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \ - } while(0) - -#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) -#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO) -#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z) -#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) -#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) -#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) -#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) -#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) -#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) -#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF) -#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED) - -/** - * \name Bitmasks for components of vectors. - * - * Used for write masks, negation masks, etc. - */ -/*@{*/ -#define RC_MASK_NONE 0 -#define RC_MASK_X 1 -#define RC_MASK_Y 2 -#define RC_MASK_Z 4 -#define RC_MASK_W 8 -#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y) -#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z) -#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W) -#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) -/*@}*/ - -typedef enum { - RC_ALURESULT_NONE = 0, - RC_ALURESULT_X, - RC_ALURESULT_W -} rc_write_aluresult; - -typedef enum { - RC_PRESUB_NONE = 0, - - /** 1 - 2 * src0 */ - RC_PRESUB_BIAS, - - /** src1 - src0 */ - RC_PRESUB_SUB, - - /** src1 + src0 */ - RC_PRESUB_ADD, - - /** 1 - src0 */ - RC_PRESUB_INV -} rc_presubtract_op; - -static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){ - switch(op){ - case RC_PRESUB_BIAS: - case RC_PRESUB_INV: - return 1; - case RC_PRESUB_ADD: - case RC_PRESUB_SUB: - return 2; - default: - return 0; - } -} - -#define RC_SOURCE_NONE 0x0 -#define RC_SOURCE_RGB 0x1 -#define RC_SOURCE_ALPHA 0x2 - -#endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c deleted file mode 100644 index 5231595..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (C) 2008-2009 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_program_pair.h" - -#include "radeon_compiler_util.h" - -#include - -/** - * Return the source slot where we installed the given register access, - * or -1 if no slot was free anymore. - */ -int rc_pair_alloc_source(struct rc_pair_instruction *pair, - unsigned int rgb, unsigned int alpha, - rc_register_file file, unsigned int index) -{ - int candidate = -1; - int candidate_quality = -1; - unsigned int alpha_used = 0; - unsigned int rgb_used = 0; - int i; - - if ((!rgb && !alpha) || file == RC_FILE_NONE) - return 0; - - /* Make sure only one presubtract operation is used per instruction. */ - if (file == RC_FILE_PRESUB) { - if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used - && index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { - return -1; - } - - if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used - && index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { - return -1; - } - } - - for(i = 0; i < 3; ++i) { - int q = 0; - if (rgb) { - if (pair->RGB.Src[i].Used) { - if (pair->RGB.Src[i].File != file || - pair->RGB.Src[i].Index != index) { - rgb_used++; - continue; - } - q++; - } - } - if (alpha) { - if (pair->Alpha.Src[i].Used) { - if (pair->Alpha.Src[i].File != file || - pair->Alpha.Src[i].Index != index) { - alpha_used++; - continue; - } - q++; - } - } - if (q > candidate_quality) { - candidate_quality = q; - candidate = i; - } - } - - if (file == RC_FILE_PRESUB) { - candidate = RC_PAIR_PRESUB_SRC; - } else if (candidate < 0 || (rgb && rgb_used > 2) - || (alpha && alpha_used > 2)) { - return -1; - } - - /* candidate >= 0 */ - - if (rgb) { - pair->RGB.Src[candidate].Used = 1; - pair->RGB.Src[candidate].File = file; - pair->RGB.Src[candidate].Index = index; - if (candidate == RC_PAIR_PRESUB_SRC) { - /* For registers with the RC_FILE_PRESUB file, - * the index stores the presubtract op. */ - int src_regs = rc_presubtract_src_reg_count(index); - for(i = 0; i < src_regs; i++) { - pair->RGB.Src[i].Used = 1; - } - } - } - if (alpha) { - pair->Alpha.Src[candidate].Used = 1; - pair->Alpha.Src[candidate].File = file; - pair->Alpha.Src[candidate].Index = index; - if (candidate == RC_PAIR_PRESUB_SRC) { - /* For registers with the RC_FILE_PRESUB file, - * the index stores the presubtract op. */ - int src_regs = rc_presubtract_src_reg_count(index); - for(i=0; i < src_regs; i++) { - pair->Alpha.Src[i].Used = 1; - } - } - } - - return candidate; -} - -static void pair_foreach_source_callback( - struct rc_pair_instruction * pair, - void * data, - rc_pair_foreach_src_fn cb, - unsigned int swz, - unsigned int src) -{ - /* swz > 3 means that the swizzle is either not used, or a constant - * swizzle (e.g. 0, 1, 0.5). */ - if(swz > 3) - return; - - if(swz == RC_SWIZZLE_W) { - if (src == RC_PAIR_PRESUB_SRC) { - unsigned int i; - unsigned int src_count = rc_presubtract_src_reg_count( - pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index); - for(i = 0; i < src_count; i++) { - cb(data, &pair->Alpha.Src[i]); - } - } else { - cb(data, &pair->Alpha.Src[src]); - } - } else { - if (src == RC_PAIR_PRESUB_SRC) { - unsigned int i; - unsigned int src_count = rc_presubtract_src_reg_count( - pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index); - for(i = 0; i < src_count; i++) { - cb(data, &pair->RGB.Src[i]); - } - } - else { - cb(data, &pair->RGB.Src[src]); - } - } -} - -void rc_pair_foreach_source_that_alpha_reads( - struct rc_pair_instruction * pair, - void * data, - rc_pair_foreach_src_fn cb) -{ - unsigned int i; - const struct rc_opcode_info * info = - rc_get_opcode_info(pair->Alpha.Opcode); - for(i = 0; i < info->NumSrcRegs; i++) { - pair_foreach_source_callback(pair, data, cb, - GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0), - pair->Alpha.Arg[i].Source); - } -} - -void rc_pair_foreach_source_that_rgb_reads( - struct rc_pair_instruction * pair, - void * data, - rc_pair_foreach_src_fn cb) -{ - unsigned int i; - const struct rc_opcode_info * info = - rc_get_opcode_info(pair->RGB.Opcode); - for(i = 0; i < info->NumSrcRegs; i++) { - unsigned int chan; - unsigned int swz = RC_SWIZZLE_UNUSED; - /* Find a swizzle that is either X,Y,Z,or W. We assume here - * that if one channel swizzles X,Y, or Z, then none of the - * other channels swizzle W, and vice-versa. */ - for(chan = 0; chan < 4; chan++) { - swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan); - if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y - || swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W) - continue; - } - pair_foreach_source_callback(pair, data, cb, - swz, - pair->RGB.Arg[i].Source); - } -} - -struct rc_pair_instruction_source * rc_pair_get_src( - struct rc_pair_instruction * pair_inst, - struct rc_pair_instruction_arg * arg) -{ - unsigned int type; - - type = rc_source_type_swz(arg->Swizzle); - - if (type & RC_SOURCE_RGB) { - return &pair_inst->RGB.Src[arg->Source]; - } else if (type & RC_SOURCE_ALPHA) { - return &pair_inst->Alpha.Src[arg->Source]; - } else { - return NULL; - } -} - -int rc_pair_get_src_index( - struct rc_pair_instruction * pair_inst, - struct rc_pair_instruction_source * src) -{ - int i; - for (i = 0; i < 3; i++) { - if (&pair_inst->RGB.Src[i] == src - || &pair_inst->Alpha.Src[i] == src) { - return i; - } - } - return -1; -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h deleted file mode 100644 index a957ea9..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_PAIR_H_ -#define __RADEON_PROGRAM_PAIR_H_ - -#include "radeon_code.h" -#include "radeon_opcodes.h" -#include "radeon_program_constants.h" - -struct radeon_compiler; - - -/** - * \file - * Represents a paired ALU instruction, as found in R300 and R500 - * fragment programs. - * - * Note that this representation is taking some liberties as far - * as register files are concerned, to allow separate register - * allocation. - * - * Also note that there are some subtleties in that the semantics - * of certain opcodes are implicitly changed in this representation; - * see \ref rc_pair_translate - */ - -/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then - * the presubtract value will be used, and - * {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB. - */ -#define RC_PAIR_PRESUB_SRC 3 - -struct rc_pair_instruction_source { - unsigned int Used:1; - unsigned int File:3; - unsigned int Index:RC_REGISTER_INDEX_BITS; -}; - -struct rc_pair_instruction_arg { - unsigned int Source:2; - unsigned int Swizzle:12; - unsigned int Abs:1; - unsigned int Negate:1; -}; - -struct rc_pair_sub_instruction { - unsigned int Opcode:8; - unsigned int DestIndex:RC_REGISTER_INDEX_BITS; - unsigned int WriteMask:4; - unsigned int Target:2; - unsigned int OutputWriteMask:3; - unsigned int DepthWriteMask:1; - unsigned int Saturate:1; - - struct rc_pair_instruction_source Src[4]; - struct rc_pair_instruction_arg Arg[3]; -}; - -struct rc_pair_instruction { - struct rc_pair_sub_instruction RGB; - struct rc_pair_sub_instruction Alpha; - - unsigned int WriteALUResult:2; - unsigned int ALUResultCompare:3; - unsigned int Nop:1; -}; - -typedef void (*rc_pair_foreach_src_fn) - (void *, struct rc_pair_instruction_source *); - -/** - * General helper functions for dealing with the paired instruction format. - */ -/*@{*/ -int rc_pair_alloc_source(struct rc_pair_instruction *pair, - unsigned int rgb, unsigned int alpha, - rc_register_file file, unsigned int index); - -void rc_pair_foreach_source_that_alpha_reads( - struct rc_pair_instruction * pair, - void * data, - rc_pair_foreach_src_fn cb); - -void rc_pair_foreach_source_that_rgb_reads( - struct rc_pair_instruction * pair, - void * data, - rc_pair_foreach_src_fn cb); - -struct rc_pair_instruction_source * rc_pair_get_src( - struct rc_pair_instruction * pair_inst, - struct rc_pair_instruction_arg * arg); - -int rc_pair_get_src_index( - struct rc_pair_instruction * pair_inst, - struct rc_pair_instruction_source * src); -/*@}*/ - - -/** - * Compiler passes that operate with the paired format. - */ -/*@{*/ -struct radeon_pair_handler; - -void rc_pair_translate(struct radeon_compiler *cc, void *user); -void rc_pair_schedule(struct radeon_compiler *cc, void *user); -void rc_pair_regalloc(struct radeon_compiler *cc, void *user); -void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user); -void rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user); -/*@}*/ - -#endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c deleted file mode 100644 index 390d131..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Copyright 2009 Nicolai Hähnle - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "radeon_program.h" - -#include - -static const char * textarget_to_string(rc_texture_target target) -{ - switch(target) { - case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY"; - case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY"; - case RC_TEXTURE_CUBE: return "CUBE"; - case RC_TEXTURE_3D: return "3D"; - case RC_TEXTURE_RECT: return "RECT"; - case RC_TEXTURE_2D: return "2D"; - case RC_TEXTURE_1D: return "1D"; - default: return "BAD_TEXTURE_TARGET"; - } -} - -static const char * presubtract_op_to_string(rc_presubtract_op op) -{ - switch(op) { - case RC_PRESUB_NONE: - return "NONE"; - case RC_PRESUB_BIAS: - return "(1 - 2 * src0)"; - case RC_PRESUB_SUB: - return "(src1 - src0)"; - case RC_PRESUB_ADD: - return "(src1 + src0)"; - case RC_PRESUB_INV: - return "(1 - src0)"; - default: - return "BAD_PRESUBTRACT_OP"; - } -} - -static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs) -{ - if (func == RC_COMPARE_FUNC_NEVER) { - fprintf(f, "false"); - } else if (func == RC_COMPARE_FUNC_ALWAYS) { - fprintf(f, "true"); - } else { - const char * op; - switch(func) { - case RC_COMPARE_FUNC_LESS: op = "<"; break; - case RC_COMPARE_FUNC_EQUAL: op = "=="; break; - case RC_COMPARE_FUNC_LEQUAL: op = "<="; break; - case RC_COMPARE_FUNC_GREATER: op = ">"; break; - case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break; - case RC_COMPARE_FUNC_GEQUAL: op = ">="; break; - default: op = "???"; break; - } - fprintf(f, "%s %s %s", lhs, op, rhs); - } -} - -static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) -{ - if (file == RC_FILE_NONE) { - fprintf(f, "none"); - } else if (file == RC_FILE_SPECIAL) { - switch(index) { - case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break; - default: fprintf(f, "special[%i]", index); break; - } - } else { - const char * filename; - switch(file) { - case RC_FILE_TEMPORARY: filename = "temp"; break; - case RC_FILE_INPUT: filename = "input"; break; - case RC_FILE_OUTPUT: filename = "output"; break; - case RC_FILE_ADDRESS: filename = "addr"; break; - case RC_FILE_CONSTANT: filename = "const"; break; - default: filename = "BAD FILE"; break; - } - fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); - } -} - -static void rc_print_mask(FILE * f, unsigned int mask) -{ - if (mask & RC_MASK_X) fprintf(f, "x"); - if (mask & RC_MASK_Y) fprintf(f, "y"); - if (mask & RC_MASK_Z) fprintf(f, "z"); - if (mask & RC_MASK_W) fprintf(f, "w"); -} - -static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) -{ - rc_print_register(f, dst.File, dst.Index, 0); - if (dst.WriteMask != RC_MASK_XYZW) { - fprintf(f, "."); - rc_print_mask(f, dst.WriteMask); - } -} - -static char rc_swizzle_char(unsigned int swz) -{ - switch(swz) { - case RC_SWIZZLE_X: return 'x'; - case RC_SWIZZLE_Y: return 'y'; - case RC_SWIZZLE_Z: return 'z'; - case RC_SWIZZLE_W: return 'w'; - case RC_SWIZZLE_ZERO: return '0'; - case RC_SWIZZLE_ONE: return '1'; - case RC_SWIZZLE_HALF: return 'H'; - case RC_SWIZZLE_UNUSED: return '_'; - } - fprintf(stderr, "bad swz: %u\n", swz); - return '?'; -} - -static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate) -{ - unsigned int comp; - for(comp = 0; comp < 4; ++comp) { - rc_swizzle swz = GET_SWZ(swizzle, comp); - if (GET_BIT(negate, comp)) - fprintf(f, "-"); - fprintf(f, "%c", rc_swizzle_char(swz)); - } -} - -static void rc_print_presub_instruction(FILE * f, - struct rc_presub_instruction inst) -{ - fprintf(f,"("); - switch(inst.Opcode){ - case RC_PRESUB_BIAS: - fprintf(f, "1 - 2 * "); - rc_print_register(f, inst.SrcReg[0].File, - inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); - break; - case RC_PRESUB_SUB: - rc_print_register(f, inst.SrcReg[1].File, - inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); - fprintf(f, " - "); - rc_print_register(f, inst.SrcReg[0].File, - inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); - break; - case RC_PRESUB_ADD: - rc_print_register(f, inst.SrcReg[1].File, - inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr); - fprintf(f, " + "); - rc_print_register(f, inst.SrcReg[0].File, - inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); - break; - case RC_PRESUB_INV: - fprintf(f, "1 - "); - rc_print_register(f, inst.SrcReg[0].File, - inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr); - break; - default: - break; - } - fprintf(f, ")"); -} - -static void rc_print_src_register(FILE * f, struct rc_instruction * inst, - struct rc_src_register src) -{ - int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); - - if (src.Negate == RC_MASK_XYZW) - fprintf(f, "-"); - if (src.Abs) - fprintf(f, "|"); - - if(src.File == RC_FILE_PRESUB) - rc_print_presub_instruction(f, inst->U.I.PreSub); - else - rc_print_register(f, src.File, src.Index, src.RelAddr); - - if (src.Abs && !trivial_negate) - fprintf(f, "|"); - - if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { - fprintf(f, "."); - rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); - } - - if (src.Abs && trivial_negate) - fprintf(f, "|"); -} - -static unsigned update_branch_depth(rc_opcode opcode, unsigned *branch_depth) -{ - switch (opcode) { - case RC_OPCODE_IF: - case RC_OPCODE_BGNLOOP: - return (*branch_depth)++ * 2; - - case RC_OPCODE_ENDIF: - case RC_OPCODE_ENDLOOP: - assert(*branch_depth > 0); - return --(*branch_depth) * 2; - - case RC_OPCODE_ELSE: - assert(*branch_depth > 0); - return (*branch_depth - 1) * 2; - - default: - return *branch_depth * 2; - } -} - -static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth) -{ - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - unsigned int reg; - unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth); - - for (unsigned i = 0; i < spaces; i++) - fprintf(f, " "); - - fprintf(f, "%s", opcode->Name); - - switch(inst->U.I.SaturateMode) { - case RC_SATURATE_NONE: break; - case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; - case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; - default: fprintf(f, "_BAD_SAT"); break; - } - - if (opcode->HasDstReg) { - fprintf(f, " "); - rc_print_dst_register(f, inst->U.I.DstReg); - if (opcode->NumSrcRegs) - fprintf(f, ","); - } - - for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { - if (reg > 0) - fprintf(f, ","); - fprintf(f, " "); - rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]); - } - - if (opcode->HasTexture) { - fprintf(f, ", %s%s[%u]", - textarget_to_string(inst->U.I.TexSrcTarget), - inst->U.I.TexShadow ? "SHADOW" : "", - inst->U.I.TexSrcUnit); - } - - fprintf(f, ";"); - - if (inst->U.I.WriteALUResult) { - fprintf(f, " [aluresult = ("); - rc_print_comparefunc(f, - (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", - inst->U.I.ALUResultCompare, "0"); - fprintf(f, ")]"); - } - - fprintf(f, "\n"); -} - -static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth) -{ - struct rc_pair_instruction * inst = &fullinst->U.P; - int printedsrc = 0; - unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ? - inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth); - - for (unsigned i = 0; i < spaces; i++) - fprintf(f, " "); - - for(unsigned int src = 0; src < 3; ++src) { - if (inst->RGB.Src[src].Used) { - if (printedsrc) - fprintf(f, ", "); - fprintf(f, "src%i.xyz = ", src); - rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0); - printedsrc = 1; - } - if (inst->Alpha.Src[src].Used) { - if (printedsrc) - fprintf(f, ", "); - fprintf(f, "src%i.w = ", src); - rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0); - printedsrc = 1; - } - } - if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - fprintf(f, ", srcp.xyz = %s", - presubtract_op_to_string( - inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index)); - } - if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { - fprintf(f, ", srcp.w = %s", - presubtract_op_to_string( - inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index)); - } - fprintf(f, "\n"); - - if (inst->RGB.Opcode != RC_OPCODE_NOP) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); - - for (unsigned i = 0; i < spaces; i++) - fprintf(f, " "); - - fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : ""); - if (inst->RGB.WriteMask) - fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex, - (inst->RGB.WriteMask & 1) ? "x" : "", - (inst->RGB.WriteMask & 2) ? "y" : "", - (inst->RGB.WriteMask & 4) ? "z" : ""); - if (inst->RGB.OutputWriteMask) - fprintf(f, " color[%i].%s%s%s", inst->RGB.Target, - (inst->RGB.OutputWriteMask & 1) ? "x" : "", - (inst->RGB.OutputWriteMask & 2) ? "y" : "", - (inst->RGB.OutputWriteMask & 4) ? "z" : ""); - if (inst->WriteALUResult == RC_ALURESULT_X) - fprintf(f, " aluresult"); - - for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { - const char* abs = inst->RGB.Arg[arg].Abs ? "|" : ""; - const char* neg = inst->RGB.Arg[arg].Negate ? "-" : ""; - fprintf(f, ", %s%ssrc", neg, abs); - if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC) - fprintf(f,"p"); - else - fprintf(f,"%d", inst->RGB.Arg[arg].Source); - fprintf(f,".%c%c%c%s", - rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)), - rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)), - rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), - abs); - } - fprintf(f, "\n"); - } - - if (inst->Alpha.Opcode != RC_OPCODE_NOP) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); - - for (unsigned i = 0; i < spaces; i++) - fprintf(f, " "); - - fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : ""); - if (inst->Alpha.WriteMask) - fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); - if (inst->Alpha.OutputWriteMask) - fprintf(f, " color[%i].w", inst->Alpha.Target); - if (inst->Alpha.DepthWriteMask) - fprintf(f, " depth.w"); - if (inst->WriteALUResult == RC_ALURESULT_W) - fprintf(f, " aluresult"); - - for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { - const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; - const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; - fprintf(f, ", %s%ssrc", neg, abs); - if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC) - fprintf(f,"p"); - else - fprintf(f,"%d", inst->Alpha.Arg[arg].Source); - fprintf(f,".%c%s", - rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs); - } - fprintf(f, "\n"); - } - - if (inst->WriteALUResult) { - for (unsigned i = 0; i < spaces; i++) - fprintf(f, " "); - - fprintf(f, " [aluresult = ("); - rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0"); - fprintf(f, ")]\n"); - } -} - -/** - * Print program to stderr, default options. - */ -void rc_print_program(const struct rc_program *prog) -{ - unsigned int linenum = 0; - unsigned branch_depth = 0; - struct rc_instruction *inst; - - fprintf(stderr, "# Radeon Compiler Program\n"); - - for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { - fprintf(stderr, "%3d: ", linenum); - - if (inst->Type == RC_INSTRUCTION_PAIR) - rc_print_pair_instruction(stderr, inst, &branch_depth); - else - rc_print_normal_instruction(stderr, inst, &branch_depth); - - linenum++; - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c deleted file mode 100644 index 8d16b2c..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ /dev/null @@ -1,528 +0,0 @@ -/* - * Copyright (C) 2010 Corbin Simpson - * Copyright (C) 2010 Marek Olšák - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_program_tex.h" - -#include "radeon_compiler_util.h" - -/* Series of transformations to be done on textures. */ - -static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler, - int tmu) -{ - struct rc_src_register reg = { 0, }; - - if (compiler->enable_shadow_ambient) { - reg.File = RC_FILE_CONSTANT; - reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants, - RC_STATE_SHADOW_AMBIENT, tmu); - reg.Swizzle = RC_SWIZZLE_WWWW; - } else { - reg.File = RC_FILE_NONE; - reg.Swizzle = RC_SWIZZLE_0000; - } - - reg.Swizzle = combine_swizzles(reg.Swizzle, - compiler->state.unit[tmu].texture_swizzle); - return reg; -} - -static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler, - int tmu) -{ - struct rc_src_register reg = { 0, }; - - reg.File = RC_FILE_NONE; - reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111, - compiler->state.unit[tmu].texture_swizzle); - return reg; -} - -static void scale_texcoords(struct r300_fragment_program_compiler *compiler, - struct rc_instruction *inst, - unsigned state_constant) -{ - struct rc_instruction *inst_mov; - - unsigned temp = rc_find_free_temporary(&compiler->Base); - - inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev); - - inst_mov->U.I.Opcode = RC_OPCODE_MUL; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = temp; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT; - inst_mov->U.I.SrcReg[1].Index = - rc_constants_add_state(&compiler->Base.Program.Constants, - state_constant, inst->U.I.TexSrcUnit); - - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = temp; -} - -static void projective_divide(struct r300_fragment_program_compiler *compiler, - struct rc_instruction *inst) -{ - struct rc_instruction *inst_mul, *inst_rcp; - - unsigned temp = rc_find_free_temporary(&compiler->Base); - - inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev); - inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->U.I.DstReg.Index = temp; - inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - /* Because the input can be arbitrarily swizzled, - * read the component mapped to W. */ - inst_rcp->U.I.SrcReg[0].Swizzle = - RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); - - inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev); - inst_mul->U.I.Opcode = RC_OPCODE_MUL; - inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mul->U.I.DstReg.Index = temp; - inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mul->U.I.SrcReg[1].Index = temp; - inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.Opcode = RC_OPCODE_TEX; - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = temp; -} - -/** - * Transform TEX, TXP, TXB, and KIL instructions in the following ways: - * - implement texture compare (shadow extensions) - * - extract non-native source / destination operands - * - premultiply texture coordinates for RECT - * - extract operand swizzles - * - introduce a temporary register when write masks are needed - */ -int radeonTransformTEX( - struct radeon_compiler * c, - struct rc_instruction * inst, - void* data) -{ - struct r300_fragment_program_compiler *compiler = - (struct r300_fragment_program_compiler*)data; - rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; - int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || - compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords; - - if (inst->U.I.Opcode != RC_OPCODE_TEX && - inst->U.I.Opcode != RC_OPCODE_TXB && - inst->U.I.Opcode != RC_OPCODE_TXP && - inst->U.I.Opcode != RC_OPCODE_TXD && - inst->U.I.Opcode != RC_OPCODE_TXL && - inst->U.I.Opcode != RC_OPCODE_KIL) - return 0; - - /* ARB_shadow & EXT_shadow_funcs */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) || - (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { - rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - - if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->U.I.Opcode = RC_OPCODE_MOV; - - if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); - } else { - inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); - } - - return 1; - } else { - struct rc_instruction * inst_rcp = NULL; - struct rc_instruction *inst_mul, *inst_add, *inst_cmp; - unsigned tmp_texsample; - unsigned tmp_sum; - int pass, fail; - - /* Save the output register. */ - struct rc_dst_register output_reg = inst->U.I.DstReg; - unsigned saturate_mode = inst->U.I.SaturateMode; - - /* Redirect TEX to a new temp. */ - tmp_texsample = rc_find_free_temporary(c); - inst->U.I.SaturateMode = 0; - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = tmp_texsample; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - - tmp_sum = rc_find_free_temporary(c); - - if (inst->U.I.Opcode == RC_OPCODE_TXP) { - /* Compute 1/W. */ - inst_rcp = rc_insert_new_instruction(c, inst); - inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->U.I.DstReg.Index = tmp_sum; - inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_rcp->U.I.SrcReg[0].Swizzle = - RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); - } - - /* Divide Z by W (if it's TXP) and saturate. */ - inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); - inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV; - inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mul->U.I.DstReg.Index = tmp_sum; - inst_mul->U.I.DstReg.WriteMask = RC_MASK_W; - inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; - inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mul->U.I.SrcReg[0].Swizzle = - RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); - if (inst->U.I.Opcode == RC_OPCODE_TXP) { - inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mul->U.I.SrcReg[1].Index = tmp_sum; - inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - } - - /* Add the depth texture value. */ - inst_add = rc_insert_new_instruction(c, inst_mul); - inst_add->U.I.Opcode = RC_OPCODE_ADD; - inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_add->U.I.DstReg.Index = tmp_sum; - inst_add->U.I.DstReg.WriteMask = RC_MASK_W; - inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_add->U.I.SrcReg[0].Index = tmp_sum; - inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; - inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_add->U.I.SrcReg[1].Index = tmp_texsample; - inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; - - /* Note that SrcReg[0] is r, SrcReg[1] is tex and: - * LESS: r < tex <=> -tex+r < 0 - * GEQUAL: r >= tex <=> not (-tex+r < 0) - * GREATER: r > tex <=> tex-r < 0 - * LEQUAL: r <= tex <=> not ( tex-r < 0) - * EQUAL: GEQUAL - * NOTEQUAL:LESS - */ - - /* This negates either r or tex: */ - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL || - comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) - inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW; - else - inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; - - /* This negates the whole expresion: */ - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER || - comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } - - inst_cmp = rc_insert_new_instruction(c, inst_add); - inst_cmp->U.I.Opcode = RC_OPCODE_CMP; - inst_cmp->U.I.SaturateMode = saturate_mode; - inst_cmp->U.I.DstReg = output_reg; - inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_cmp->U.I.SrcReg[0].Index = tmp_sum; - inst_cmp->U.I.SrcReg[0].Swizzle = - combine_swizzles(RC_SWIZZLE_WWWW, - compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle); - inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit); - inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit); - - assert(tmp_texsample != tmp_sum); - } - } - - /* R300 cannot sample from rectangles and the wrap mode fallback needs - * normalized coordinates anyway. */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) { - scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR); - inst->U.I.TexSrcTarget = RC_TEXTURE_2D; - } - - /* Divide by W if needed. */ - if (inst->U.I.Opcode == RC_OPCODE_TXP && - (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT || - compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) { - projective_divide(compiler, inst); - } - - /* Texture wrap modes don't work on NPOT textures. - * - * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and - * mirroring are not. If we need to repeat, we do: - * - * MUL temp, texcoord, - * FRC temp, temp ; Discard integer portion of coords - * - * This gives us coords in [0, 1]. - * - * Mirroring is trickier. We're going to start out like repeat: - * - * MUL temp, texcoord, ; De-mirror across axes - * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] - * ; so scale to [0, 1] - * FRC temp, temp ; Make the pattern repeat - * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] - * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. - * ; The pattern is backwards, so reverse it (1-x). - * - * This gives us coords in [0, 1]. - * - * ~ C & M. ;) - */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - wrapmode != RC_WRAP_NONE) { - struct rc_instruction *inst_mov; - unsigned temp = rc_find_free_temporary(c); - - if (wrapmode == RC_WRAP_REPEAT) { - /* Both instructions will be paired up. */ - struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); - - inst_frc->U.I.Opcode = RC_OPCODE_FRC; - inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_frc->U.I.DstReg.Index = temp; - inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { - /* - * Function: - * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) - * - * Code: - * MUL temp, src0, 0.5 - * FRC temp, temp - * MAD temp, temp, 2, -1 - * ADD temp, 1, -abs(temp) - */ - - struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; - unsigned two, two_swizzle; - - inst_mul = rc_insert_new_instruction(c, inst->Prev); - - inst_mul->U.I.Opcode = RC_OPCODE_MUL; - inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mul->U.I.DstReg.Index = temp; - inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; - - inst_frc = rc_insert_new_instruction(c, inst->Prev); - - inst_frc->U.I.Opcode = RC_OPCODE_FRC; - inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_frc->U.I.DstReg.Index = temp; - inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_frc->U.I.SrcReg[0].Index = temp; - inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; - - two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); - inst_mad = rc_insert_new_instruction(c, inst->Prev); - - inst_mad->U.I.Opcode = RC_OPCODE_MAD; - inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->U.I.DstReg.Index = temp; - inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[0].Index = temp; - inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; - inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; - inst_mad->U.I.SrcReg[1].Index = two; - inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; - inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; - inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; - - inst_add = rc_insert_new_instruction(c, inst->Prev); - - inst_add->U.I.Opcode = RC_OPCODE_ADD; - inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_add->U.I.DstReg.Index = temp; - inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; - inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_add->U.I.SrcReg[1].Index = temp; - inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; - inst_add->U.I.SrcReg[1].Abs = 1; - inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; - } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { - /* - * Mirrored clamp modes are bloody simple, we just use abs - * to mirror [0, 1] into [-1, 0]. This works for - * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. - */ - struct rc_instruction *inst_mov; - - inst_mov = rc_insert_new_instruction(c, inst->Prev); - - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = temp; - inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mov->U.I.SrcReg[0].Abs = 1; - } - - /* Preserve W for TXP/TXB. */ - inst_mov = rc_insert_new_instruction(c, inst->Prev); - - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = temp; - inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = temp; - } - - /* NPOT -> POT conversion for 3D textures. */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) { - struct rc_instruction *inst_mov; - unsigned temp = rc_find_free_temporary(c); - - /* Saturate XYZ. */ - inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = temp; - inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - - /* Copy W. */ - inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = temp; - inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = temp; - - scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR); - } - - /* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM. - * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2 - */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) { - unsigned two, two_swizzle; - struct rc_instruction *inst_mul, *inst_mad, *inst_cnd; - - two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle); - - inst_mul = rc_insert_new_instruction(c, inst); - inst_mul->U.I.Opcode = RC_OPCODE_MUL; - inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */ - inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */ - inst_mul->U.I.SrcReg[1].Index = two; - inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle; - - inst_mad = rc_insert_new_instruction(c, inst_mul); - inst_mad->U.I.Opcode = RC_OPCODE_MAD; - inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ - inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */ - inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */ - inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW; - - inst_cnd = rc_insert_new_instruction(c, inst_mad); - inst_cnd->U.I.Opcode = RC_OPCODE_CND; - inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode; - inst_cnd->U.I.DstReg = inst->U.I.DstReg; - inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; - inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; - inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index; - inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle; - inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */ - - inst->U.I.SaturateMode = 0; - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - } - - /* Cannot write texture to output registers or with saturate (all chips), - * or with masks (non-r500). */ - if (inst->U.I.Opcode != RC_OPCODE_KIL && - (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || - inst->U.I.SaturateMode || - (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode; - inst_mov->U.I.DstReg = inst->U.I.DstReg; - inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); - - inst->U.I.SaturateMode = 0; - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - } - - /* Cannot read texture coordinate from constants file */ - if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - - reset_srcreg(&inst->U.I.SrcReg[0]); - inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; - } - - return 1; -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h deleted file mode 100644 index a010505..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2010 Corbin Simpson - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_TEX_H_ -#define __RADEON_PROGRAM_TEX_H_ - -#include "radeon_compiler.h" -#include "radeon_program.h" - -int radeonTransformTEX( - struct radeon_compiler * c, - struct rc_instruction * inst, - void* data); - -#endif /* __RADEON_PROGRAM_TEX_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c deleted file mode 100644 index 7d76585..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (C) 2010 Marek Olšák - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_remove_constants.h" -#include "radeon_dataflow.h" - -struct mark_used_data { - unsigned char * const_used; - unsigned * has_rel_addr; -}; - -static void remap_regs(void * userdata, struct rc_instruction * inst, - rc_register_file * pfile, unsigned int * pindex) -{ - unsigned *inv_remap_table = userdata; - - if (*pfile == RC_FILE_CONSTANT) { - *pindex = inv_remap_table[*pindex]; - } -} - -static void mark_used(void * userdata, struct rc_instruction * inst, - struct rc_src_register * src) -{ - struct mark_used_data * d = userdata; - - if (src->File == RC_FILE_CONSTANT) { - if (src->RelAddr) { - *d->has_rel_addr = 1; - } else { - d->const_used[src->Index] = 1; - } - } -} - -void rc_remove_unused_constants(struct radeon_compiler *c, void *user) -{ - unsigned **out_remap_table = (unsigned**)user; - unsigned char *const_used; - unsigned *remap_table; - unsigned *inv_remap_table; - unsigned has_rel_addr = 0; - unsigned is_identity = 1; - unsigned are_externals_remapped = 0; - struct rc_constant *constants = c->Program.Constants.Constants; - struct mark_used_data d; - unsigned new_count; - - if (!c->Program.Constants.Count) { - *out_remap_table = NULL; - return; - } - - const_used = malloc(c->Program.Constants.Count); - memset(const_used, 0, c->Program.Constants.Count); - - d.const_used = const_used; - d.has_rel_addr = &has_rel_addr; - - /* Pass 1: Mark used constants. */ - for (struct rc_instruction *inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; inst = inst->Next) { - rc_for_all_reads_src(inst, mark_used, &d); - } - - /* Pass 2: If there is relative addressing or dead constant elimination - * is disabled, mark all externals as used. */ - if (has_rel_addr || !c->remove_unused_constants) { - for (unsigned i = 0; i < c->Program.Constants.Count; i++) - if (constants[i].Type == RC_CONSTANT_EXTERNAL) - const_used[i] = 1; - } - - /* Pass 3: Make the remapping table and remap constants. - * This pass removes unused constants simply by overwriting them by other constants. */ - remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); - inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); - new_count = 0; - - for (unsigned i = 0; i < c->Program.Constants.Count; i++) { - if (const_used[i]) { - remap_table[new_count] = i; - inv_remap_table[i] = new_count; - - if (i != new_count) { - if (constants[i].Type == RC_CONSTANT_EXTERNAL) - are_externals_remapped = 1; - - constants[new_count] = constants[i]; - is_identity = 0; - } - new_count++; - } - } - - /* is_identity ==> new_count == old_count - * !is_identity ==> new_count < old_count */ - assert( is_identity || new_count < c->Program.Constants.Count); - assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped)); - - /* Pass 4: Redirect reads of all constants to their new locations. */ - if (!is_identity) { - for (struct rc_instruction *inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; inst = inst->Next) { - rc_remap_registers(inst, remap_regs, inv_remap_table); - } - } - - /* Set the new constant count. Note that new_count may be less than - * Count even though the remapping function is identity. In that case, - * the constants have been removed at the end of the array. */ - c->Program.Constants.Count = new_count; - - if (are_externals_remapped) { - *out_remap_table = remap_table; - } else { - *out_remap_table = NULL; - free(remap_table); - } - - free(const_used); - free(inv_remap_table); - - if (c->Debug & RC_DBG_LOG) - rc_constants_print(&c->Program.Constants); -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h deleted file mode 100644 index f29113b..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (C) 2010 Marek Olšák - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef RADEON_REMOVE_CONSTANTS_H -#define RADEON_REMOVE_CONSTANTS_H - -#include "radeon_compiler.h" - -void rc_remove_unused_constants(struct radeon_compiler *c, void *user); - -#endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c deleted file mode 100644 index cafa057..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright 2010 Tom Stellard - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * \file - */ - -#include "radeon_rename_regs.h" - -#include "radeon_compiler.h" -#include "radeon_dataflow.h" -#include "radeon_program.h" - -/** - * This function renames registers in an attempt to get the code close to - * SSA form. After this function has completed, most of the register are only - * written to one time, with a few exceptions. - * - * This function assumes all the instructions are still of type - * RC_INSTRUCTION_NORMAL. - */ -void rc_rename_regs(struct radeon_compiler *c, void *user) -{ - unsigned int i, used_length; - int new_index; - struct rc_instruction * inst; - struct rc_reader_data reader_data; - unsigned char * used; - - /* XXX Remove this once the register allocation works with flow control. */ - for(inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) - return; - } - - used_length = 2 * rc_recompute_ips(c); - used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); - memset(used, 0, sizeof(unsigned char) * used_length); - - rc_get_used_temporaries(c, used, used_length); - for(inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - - if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) - continue; - - reader_data.ExitOnAbort = 1; - rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); - - if (reader_data.Abort || reader_data.ReaderCount == 0) - continue; - - new_index = rc_find_free_temporary_list(c, used, used_length, - RC_MASK_XYZW); - if (new_index < 0) { - rc_error(c, "Ran out of temporary registers\n"); - return; - } - - reader_data.Writer->U.I.DstReg.Index = new_index; - for(i = 0; i < reader_data.ReaderCount; i++) { - reader_data.Readers[i].U.I.Src->Index = new_index; - } - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.h b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.h deleted file mode 100644 index 3baf29f..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.h +++ /dev/null @@ -1,9 +0,0 @@ - -#ifndef RADEON_RENAME_REGS_H -#define RADEON_RENAME_REGS_H - -struct radeon_compiler; - -void rc_rename_regs(struct radeon_compiler *c, void *user); - -#endif /* RADEON_RENAME_REGS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h deleted file mode 100644 index c81d5f7..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef RADEON_SWIZZLE_H -#define RADEON_SWIZZLE_H - -#include "radeon_program.h" - -struct rc_swizzle_split { - unsigned char NumPhases; - unsigned char Phase[4]; -}; - -/** - * Describe the swizzling capability of target hardware. - */ -struct rc_swizzle_caps { - /** - * Check whether the given swizzle, absolute and negate combination - * can be implemented natively by the hardware for this opcode. - * - * \return 1 if the swizzle is native for the given opcode - */ - int (*IsNative)(rc_opcode opcode, struct rc_src_register reg); - - /** - * Determine how to split access to the masked channels of the - * given source register to obtain ALU-native swizzles. - */ - void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split); -}; - -#endif /* RADEON_SWIZZLE_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c deleted file mode 100644 index 938fb84..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c +++ /dev/null @@ -1,517 +0,0 @@ -/* - * Copyright 2011 Tom Stellard - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_variable.h" - -#include "memory_pool.h" -#include "radeon_compiler_util.h" -#include "radeon_dataflow.h" -#include "radeon_list.h" -#include "radeon_opcodes.h" -#include "radeon_program.h" - -/** - * Rewrite the index and writemask for the destination register of var - * and its friends to new_index and new_writemask. This function also takes - * care of rewriting the swizzles for the sources of var. - */ -void rc_variable_change_dst( - struct rc_variable * var, - unsigned int new_index, - unsigned int new_writemask) -{ - struct rc_variable * var_ptr; - struct rc_list * readers; - unsigned int old_mask = rc_variable_writemask_sum(var); - unsigned int conversion_swizzle = - rc_make_conversion_swizzle(old_mask, new_writemask); - - for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) { - if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { - rc_normal_rewrite_writemask(var_ptr->Inst, - conversion_swizzle); - var_ptr->Inst->U.I.DstReg.Index = new_index; - } else { - struct rc_pair_sub_instruction * sub; - if (var_ptr->Dst.WriteMask == RC_MASK_W) { - assert(new_writemask & RC_MASK_W); - sub = &var_ptr->Inst->U.P.Alpha; - } else { - sub = &var_ptr->Inst->U.P.RGB; - rc_pair_rewrite_writemask(sub, - conversion_swizzle); - } - sub->DestIndex = new_index; - } - } - - readers = rc_variable_readers_union(var); - - for ( ; readers; readers = readers->Next) { - struct rc_reader * reader = readers->Item; - if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) { - reader->U.I.Src->Index = new_index; - reader->U.I.Src->Swizzle = rc_rewrite_swizzle( - reader->U.I.Src->Swizzle, conversion_swizzle); - } else { - struct rc_pair_instruction * pair_inst = - &reader->Inst->U.P; - unsigned int src_type = rc_source_type_swz( - reader->U.P.Arg->Swizzle); - - int src_index = reader->U.P.Arg->Source; - if (src_index == RC_PAIR_PRESUB_SRC) { - src_index = rc_pair_get_src_index( - pair_inst, reader->U.P.Src); - } - /* Try to delete the old src, it is OK if this fails, - * because rc_pair_alloc_source might be able to - * find a source the ca be reused. - */ - if (rc_pair_remove_src(reader->Inst, src_type, - src_index, old_mask)) { - /* Reuse the source index of the source that - * was just deleted and set its register - * index. We can't use rc_pair_alloc_source - * for this becuase it might return a source - * index that is already being used. */ - if (src_type & RC_SOURCE_RGB) { - pair_inst->RGB.Src[src_index] - .Used = 1; - pair_inst->RGB.Src[src_index] - .Index = new_index; - pair_inst->RGB.Src[src_index] - .File = RC_FILE_TEMPORARY; - } - if (src_type & RC_SOURCE_ALPHA) { - pair_inst->Alpha.Src[src_index] - .Used = 1; - pair_inst->Alpha.Src[src_index] - .Index = new_index; - pair_inst->Alpha.Src[src_index] - .File = RC_FILE_TEMPORARY; - } - } else { - src_index = rc_pair_alloc_source( - &reader->Inst->U.P, - src_type & RC_SOURCE_RGB, - src_type & RC_SOURCE_ALPHA, - RC_FILE_TEMPORARY, - new_index); - if (src_index < 0) { - rc_error(var->C, "Rewrite of inst %u failed " - "Can't allocate source for " - "Inst %u src_type=%x " - "new_index=%u new_mask=%u\n", - var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask); - continue; - } - } - reader->U.P.Arg->Swizzle = rc_rewrite_swizzle( - reader->U.P.Arg->Swizzle, conversion_swizzle); - if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) { - reader->U.P.Arg->Source = src_index; - } - } - } -} - -/** - * Compute the live intervals for var and its friends. - */ -void rc_variable_compute_live_intervals(struct rc_variable * var) -{ - while(var) { - unsigned int i; - unsigned int start = var->Inst->IP; - - for (i = 0; i < var->ReaderCount; i++) { - unsigned int chan; - unsigned int chan_start = start; - unsigned int chan_end = var->Readers[i].Inst->IP; - unsigned int mask = var->Readers[i].WriteMask; - struct rc_instruction * inst; - - /* Extend the live interval of T0 to the start of the - * loop for sequences like: - * BGNLOOP - * read T0 - * ... - * write T0 - * ENDLOOP - */ - if (var->Readers[i].Inst->IP < start) { - struct rc_instruction * bgnloop = - rc_match_endloop(var->Readers[i].Inst); - chan_start = bgnloop->IP; - } - - /* Extend the live interval of T0 to the start of the - * loop in case there is a BRK instruction in the loop - * (we don't actually check for a BRK instruction we - * assume there is one somewhere in the loop, which - * there usually is) for sequences like: - * BGNLOOP - * ... - * conditional BRK - * ... - * write T0 - * ENDLOOP - * read T0 - *************************************************** - * Extend the live interval of T0 to the end of the - * loop for sequences like: - * write T0 - * BGNLOOP - * ... - * read T0 - * ENDLOOP - */ - for (inst = var->Inst; inst != var->Readers[i].Inst; - inst = inst->Next) { - rc_opcode op = rc_get_flow_control_inst(inst); - if (op == RC_OPCODE_ENDLOOP) { - struct rc_instruction * bgnloop = - rc_match_endloop(inst); - if (bgnloop->IP < chan_start) { - chan_start = bgnloop->IP; - } - } else if (op == RC_OPCODE_BGNLOOP) { - struct rc_instruction * endloop = - rc_match_bgnloop(inst); - if (endloop->IP > chan_end) { - chan_end = endloop->IP; - } - } - } - - for (chan = 0; chan < 4; chan++) { - if ((mask >> chan) & 0x1) { - if (!var->Live[chan].Used - || chan_start < var->Live[chan].Start) { - var->Live[chan].Start = - chan_start; - } - if (!var->Live[chan].Used - || chan_end > var->Live[chan].End) { - var->Live[chan].End = chan_end; - } - var->Live[chan].Used = 1; - } - } - } - var = var->Friend; - } -} - -/** - * @return 1 if a and b share a reader - * @return 0 if they do not - */ -static unsigned int readers_intersect( - struct rc_variable * a, - struct rc_variable * b) -{ - unsigned int a_index, b_index; - for (a_index = 0; a_index < a->ReaderCount; a_index++) { - struct rc_reader reader_a = a->Readers[a_index]; - for (b_index = 0; b_index < b->ReaderCount; b_index++) { - struct rc_reader reader_b = b->Readers[b_index]; - if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL - && reader_b.Inst->Type == RC_INSTRUCTION_NORMAL - && reader_a.U.I.Src == reader_b.U.I.Src) { - - return 1; - } - if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR - && reader_b.Inst->Type == RC_INSTRUCTION_PAIR - && reader_a.U.P.Src == reader_b.U.P.Src) { - - return 1; - } - } - } - return 0; -} - -void rc_variable_add_friend( - struct rc_variable * var, - struct rc_variable * friend) -{ - assert(var->Dst.Index == friend->Dst.Index); - while(var->Friend) { - var = var->Friend; - } - var->Friend = friend; -} - -struct rc_variable * rc_variable( - struct radeon_compiler * c, - unsigned int DstFile, - unsigned int DstIndex, - unsigned int DstWriteMask, - struct rc_reader_data * reader_data) -{ - struct rc_variable * new = - memory_pool_malloc(&c->Pool, sizeof(struct rc_variable)); - memset(new, 0, sizeof(struct rc_variable)); - new->C = c; - new->Dst.File = DstFile; - new->Dst.Index = DstIndex; - new->Dst.WriteMask = DstWriteMask; - if (reader_data) { - new->Inst = reader_data->Writer; - new->ReaderCount = reader_data->ReaderCount; - new->Readers = reader_data->Readers; - } - return new; -} - -static void get_variable_helper( - struct rc_list ** variable_list, - struct rc_variable * variable) -{ - struct rc_list * list_ptr; - for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) { - if (readers_intersect(variable, list_ptr->Item)) { - rc_variable_add_friend(list_ptr->Item, variable); - return; - } - } - rc_list_add(variable_list, rc_list(&variable->C->Pool, variable)); -} - -static void get_variable_pair_helper( - struct rc_list ** variable_list, - struct radeon_compiler * c, - struct rc_instruction * inst, - struct rc_pair_sub_instruction * sub_inst) -{ - struct rc_reader_data reader_data; - struct rc_variable * new_var; - rc_register_file file; - unsigned int writemask; - - if (sub_inst->Opcode == RC_OPCODE_NOP) { - return; - } - memset(&reader_data, 0, sizeof(struct rc_reader_data)); - rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL); - - if (reader_data.ReaderCount == 0) { - return; - } - - if (sub_inst->WriteMask) { - file = RC_FILE_TEMPORARY; - writemask = sub_inst->WriteMask; - } else if (sub_inst->OutputWriteMask) { - file = RC_FILE_OUTPUT; - writemask = sub_inst->OutputWriteMask; - } else { - writemask = 0; - file = RC_FILE_NONE; - } - new_var = rc_variable(c, file, sub_inst->DestIndex, writemask, - &reader_data); - get_variable_helper(variable_list, new_var); -} - -/** - * Generate a list of variables used by the shader program. Each instruction - * that writes to a register is considered a variable. The struct rc_variable - * data structure includes a list of readers and is essentially a - * definition-use chain. Any two variables that share a reader are considered - * "friends" and they are linked together via the Friend attribute. - */ -struct rc_list * rc_get_variables(struct radeon_compiler * c) -{ - struct rc_instruction * inst; - struct rc_list * variable_list = NULL; - - for (inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - struct rc_reader_data reader_data; - struct rc_variable * new_var; - memset(&reader_data, 0, sizeof(reader_data)); - - if (inst->Type == RC_INSTRUCTION_NORMAL) { - rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); - if (reader_data.ReaderCount == 0) { - continue; - } - new_var = rc_variable(c, inst->U.I.DstReg.File, - inst->U.I.DstReg.Index, - inst->U.I.DstReg.WriteMask, &reader_data); - get_variable_helper(&variable_list, new_var); - } else { - get_variable_pair_helper(&variable_list, c, inst, - &inst->U.P.RGB); - get_variable_pair_helper(&variable_list, c, inst, - &inst->U.P.Alpha); - } - } - - return variable_list; -} - -/** - * @return The bitwise or of the writemasks of a variable and all of its - * friends. - */ -unsigned int rc_variable_writemask_sum(struct rc_variable * var) -{ - unsigned int writemask = 0; - while(var) { - writemask |= var->Dst.WriteMask; - var = var->Friend; - } - return writemask; -} - -/* - * @return A list of readers for a variable and its friends. Readers - * that read from two different variable friends are only included once in - * this list. - */ -struct rc_list * rc_variable_readers_union(struct rc_variable * var) -{ - struct rc_list * list = NULL; - while (var) { - unsigned int i; - for (i = 0; i < var->ReaderCount; i++) { - struct rc_list * temp; - struct rc_reader * a = &var->Readers[i]; - unsigned int match = 0; - for (temp = list; temp; temp = temp->Next) { - struct rc_reader * b = temp->Item; - if (a->Inst->Type != b->Inst->Type) { - continue; - } - if (a->Inst->Type == RC_INSTRUCTION_NORMAL) { - if (a->U.I.Src == b->U.I.Src) { - match = 1; - break; - } - } - if (a->Inst->Type == RC_INSTRUCTION_PAIR) { - if (a->U.P.Arg == b->U.P.Arg - && a->U.P.Src == b->U.P.Src) { - match = 1; - break; - } - } - } - if (match) { - continue; - } - rc_list_add(&list, rc_list(&var->C->Pool, a)); - } - var = var->Friend; - } - return list; -} - -static unsigned int reader_equals_src( - struct rc_reader reader, - unsigned int src_type, - void * src) -{ - if (reader.Inst->Type != src_type) { - return 0; - } - if (src_type == RC_INSTRUCTION_NORMAL) { - return reader.U.I.Src == src; - } else { - return reader.U.P.Src == src; - } -} - -static unsigned int variable_writes_src( - struct rc_variable * var, - unsigned int src_type, - void * src) -{ - unsigned int i; - for (i = 0; i < var->ReaderCount; i++) { - if (reader_equals_src(var->Readers[i], src_type, src)) { - return 1; - } - } - return 0; -} - - -struct rc_list * rc_variable_list_get_writers( - struct rc_list * var_list, - unsigned int src_type, - void * src) -{ - struct rc_list * list_ptr; - struct rc_list * writer_list = NULL; - for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) { - struct rc_variable * var = list_ptr->Item; - if (variable_writes_src(var, src_type, src)) { - struct rc_variable * friend; - rc_list_add(&writer_list, rc_list(&var->C->Pool, var)); - for (friend = var->Friend; friend; - friend = friend->Friend) { - if (variable_writes_src(friend, src_type, src)) { - rc_list_add(&writer_list, - rc_list(&var->C->Pool, friend)); - } - } - /* Once we have indentifed the variable and its - * friends that write this source, we can stop - * stop searching, because we know know of the - * other variables in the list will write this source. - * If they did they would be friends of var. - */ - break; - } - } - return writer_list; -} - -void rc_variable_print(struct rc_variable * var) -{ - unsigned int i; - while (var) { - fprintf(stderr, "%u: TEMP[%u].%u: ", - var->Inst->IP, var->Dst.Index, var->Dst.WriteMask); - for (i = 0; i < 4; i++) { - fprintf(stderr, "chan %u: start=%u end=%u ", i, - var->Live[i].Start, var->Live[i].End); - } - fprintf(stderr, "%u readers\n", var->ReaderCount); - if (var->Friend) { - fprintf(stderr, "Friend: \n\t"); - } - var = var->Friend; - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h deleted file mode 100644 index 9427bee..0000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright 2011 Tom Stellard - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef RADEON_VARIABLE_H -#define RADEON_VARIABLE_H - -#include "radeon_compiler.h" - -struct radeon_compiler; -struct rc_list; -struct rc_reader_data; -struct rc_readers; - -struct live_intervals { - int Start; - int End; - int Used; -}; - -struct rc_variable { - struct radeon_compiler * C; - struct rc_dst_register Dst; - - struct rc_instruction * Inst; - unsigned int ReaderCount; - struct rc_reader * Readers; - struct live_intervals Live[4]; - - /* A friend is a variable that shares a reader with another variable. - */ - struct rc_variable * Friend; -}; - -void rc_variable_change_dst( - struct rc_variable * var, - unsigned int new_index, - unsigned int new_writemask); - -void rc_variable_compute_live_intervals(struct rc_variable * var); - -void rc_variable_add_friend( - struct rc_variable * var, - struct rc_variable * friend); - -struct rc_variable * rc_variable( - struct radeon_compiler * c, - unsigned int DstFile, - unsigned int DstIndex, - unsigned int DstWriteMask, - struct rc_reader_data * reader_data); - -struct rc_list * rc_get_variables(struct radeon_compiler * c); - -unsigned int rc_variable_writemask_sum(struct rc_variable * var); - -struct rc_list * rc_variable_readers_union(struct rc_variable * var); - -struct rc_list * rc_variable_list_get_writers( - struct rc_list * var_list, - unsigned int src_type, - void * src); - -void rc_variable_print(struct rc_variable * var); - -#endif /* RADEON_VARIABLE_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/tests/Makefile b/src/mesa/drivers/dri/r300/compiler/tests/Makefile deleted file mode 100644 index e268543..0000000 --- a/src/mesa/drivers/dri/r300/compiler/tests/Makefile +++ /dev/null @@ -1,55 +0,0 @@ -# src/mesa/drivers/dri/r300/compiler/Makefile - -TOP = ../../../../../../.. -include $(TOP)/configs/current - -CFLAGS += -Wall -Werror - -### Basic defines ### -TESTS = radeon_compiler_util_tests - -TEST_SOURCES := $(TESTS:=.c) - -SHARED_SOURCES = \ - rc_test_helpers.c \ - unit_test.c - -C_SOURCES = $(SHARED_SOURCES) $(TEST_SOURCES) - -INCLUDES = \ - -I. \ - -I.. - -COMPILER_LIB = ../libr300compiler.a - -##### TARGETS ##### - -default: depend run_tests - -depend: $(C_SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $^ 2> /dev/null - -# Remove .o and backup files -clean: - rm -f $(TESTS) depend depend.bak - -$(TESTS): $(TESTS:=.o) $(SHARED_SOURCES:.c=.o) $(COMPILER_LIB) - $(APP_CC) -o $@ $^ - -run_tests: $(TESTS) - @echo "RUNNING TESTS:" - @echo "" - $(foreach test, $^, @./$(test)) - -.PHONY: $(COMPILER_LIB) -$(COMPILER_LIB): - $(MAKE) -C .. - -##### RULES ##### -.c.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ - - -sinclude depend diff --git a/src/mesa/drivers/dri/r300/compiler/tests/radeon_compiler_util_tests.c b/src/mesa/drivers/dri/r300/compiler/tests/radeon_compiler_util_tests.c deleted file mode 100644 index a2e3f2a..0000000 --- a/src/mesa/drivers/dri/r300/compiler/tests/radeon_compiler_util_tests.c +++ /dev/null @@ -1,76 +0,0 @@ -#include -#include -#include - -#include "radeon_compiler_util.h" -#include "radeon_program.h" - -#include "rc_test_helpers.h" -#include "unit_test.h" - -static void test_rc_inst_can_use_presub( - struct test_result * result, - int expected, - const char * add_str, - const char * replace_str) -{ - struct rc_instruction add_inst, replace_inst; - int ret; - - test_begin(result); - init_rc_normal_instruction(&add_inst, add_str); - init_rc_normal_instruction(&replace_inst, replace_str); - - ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0, - &replace_inst.U.I.SrcReg[0], - &add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]); - - test_check(result, ret == expected); -} - -static void test_runner_rc_inst_can_use_presub(struct test_result * result) -{ - - /* This tests the case where the source being replace has the same - * register file and register index as another source register in the - * CMP instruction. A previous version of this function was ignoring - * all registers that shared the same file and index as the replacement - * register when counting the number of source selects. - * - * https://bugs.freedesktop.org/show_bug.cgi?id=36527 - */ - test_rc_inst_can_use_presub(result, 0, - "ADD temp[0].z, temp[6].__x_, const[1].__x_;", - "CMP temp[0].y, temp[0]._z__, const[0]._z__, temp[0]._y__;"); - - - /* Testing a random case that should fail - * - * https://bugs.freedesktop.org/show_bug.cgi?id=36527 - */ - test_rc_inst_can_use_presub(result, 0, - "ADD temp[3], temp[1], temp[2];", - "MAD temp[1], temp[0], const[0].xxxx, -temp[3];"); - - /* This tests the case where the arguments of the ADD - * instruction share the same register file and index. Normally, we - * would need only one source select for these two arguments, but since - * they will be part of a presubtract operation we need to use the two - * source selects that the presubtract instruction expects - * (src0 and src1). - * - * https://bugs.freedesktop.org/show_bug.cgi?id=36527 - */ - test_rc_inst_can_use_presub(result, 0, - "ADD temp[3].x, temp[0].x___, temp[0].x___;", - "MAD temp[0].xyz, temp[2].xyz_, -temp[3].xxx_, input[5].xyz_;"); -} - -int main(int argc, char ** argv) -{ - struct test tests[] = { - {"rc_inst_can_use_presub()", test_runner_rc_inst_can_use_presub}, - {NULL, NULL} - }; - run_tests(tests); -} diff --git a/src/mesa/drivers/dri/r300/compiler/tests/rc_test_helpers.c b/src/mesa/drivers/dri/r300/compiler/tests/rc_test_helpers.c deleted file mode 100644 index ca4738a..0000000 --- a/src/mesa/drivers/dri/r300/compiler/tests/rc_test_helpers.c +++ /dev/null @@ -1,380 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "../radeon_compiler_util.h" -#include "../radeon_opcodes.h" -#include "../radeon_program.h" - -#include "rc_test_helpers.h" - -/* This file contains some helper functions for filling out the rc_instruction - * data structures. These functions take a string as input based on the format - * output by rc_program_print(). - */ - -#define VERBOSE 0 - -#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) - -#define REGEX_ERR_BUF_SIZE 50 - -struct match_info { - const char * String; - int Length; -}; - -static int match_length(regmatch_t * matches, int index) -{ - return matches[index].rm_eo - matches[index].rm_so; -} - -static int regex_helper( - const char * regex_str, - const char * search_str, - regmatch_t * matches, - int num_matches) -{ - char err_buf[REGEX_ERR_BUF_SIZE]; - regex_t regex; - int err_code; - unsigned int i; - - err_code = regcomp(®ex, regex_str, REG_EXTENDED); - if (err_code) { - regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); - fprintf(stderr, "Failed to compile regex: %s\n", err_buf); - return 0; - } - - err_code = regexec(®ex, search_str, num_matches, matches, 0); - DBG("Search string: '%s'\n", search_str); - for (i = 0; i < num_matches; i++) { - DBG("Match %u start = %d end = %d\n", i, - matches[i].rm_so, matches[i].rm_eo); - } - if (err_code) { - regerror(err_code, ®ex, err_buf, REGEX_ERR_BUF_SIZE); - fprintf(stderr, "Failed to match regex: %s\n", err_buf); - return 0; - } - return 1; -} - -#define REGEX_SRC_MATCHES 6 - -struct src_tokens { - struct match_info Negate; - struct match_info Abs; - struct match_info File; - struct match_info Index; - struct match_info Swizzle; -}; - -/** - * Initialize the source register at index src_index for the instruction based - * on src_str. - * - * NOTE: Warning in init_rc_normal_instruction() applies to this function as - * well. - * - * @param src_str A string that represents the source register. The format for - * this string is the same that is output by rc_program_print. - * @return 1 On success, 0 on failure - */ -int init_rc_normal_src( - struct rc_instruction * inst, - unsigned int src_index, - const char * src_str) -{ - const char * regex_str = "(-*)(\\|*)([[:lower:]]*)\\[([[:digit:]])\\](\\.*[[:lower:]-]*)"; - regmatch_t matches[REGEX_SRC_MATCHES]; - struct src_tokens tokens; - struct rc_src_register * src_reg = &inst->U.I.SrcReg[src_index]; - unsigned int i; - - /* Execute the regex */ - if (!regex_helper(regex_str, src_str, matches, REGEX_SRC_MATCHES)) { - fprintf(stderr, "Failed to execute regex for src register.\n"); - return 0; - } - - /* Create Tokens */ - tokens.Negate.String = src_str + matches[1].rm_so; - tokens.Negate.Length = match_length(matches, 1); - tokens.Abs.String = src_str + matches[2].rm_so; - tokens.Abs.Length = match_length(matches, 2); - tokens.File.String = src_str + matches[3].rm_so; - tokens.File.Length = match_length(matches, 3); - tokens.Index.String = src_str + matches[4].rm_so; - tokens.Index.Length = match_length(matches, 4); - tokens.Swizzle.String = src_str + matches[5].rm_so; - tokens.Swizzle.Length = match_length(matches, 5); - - /* Negate */ - if (tokens.Negate.Length > 0) { - src_reg->Negate = RC_MASK_XYZW; - } - - /* Abs */ - if (tokens.Abs.Length > 0) { - src_reg->Abs = 1; - } - - /* File */ - if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { - src_reg->File = RC_FILE_TEMPORARY; - } else if (!strncmp(tokens.File.String, "input", tokens.File.Length)) { - src_reg->File = RC_FILE_INPUT; - } else if (!strncmp(tokens.File.String, "const", tokens.File.Length)) { - src_reg->File = RC_FILE_CONSTANT; - } else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) { - src_reg->File = RC_FILE_NONE; - } - - /* Index */ - errno = 0; - src_reg->Index = strtol(tokens.Index.String, NULL, 10); - if (errno > 0) { - fprintf(stderr, "Could not convert src register index.\n"); - return 0; - } - - /* Swizzle */ - if (tokens.Swizzle.Length == 0) { - src_reg->Swizzle = RC_SWIZZLE_XYZW; - } else { - int str_index = 1; - src_reg->Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED); - if (tokens.Swizzle.String[0] != '.') { - fprintf(stderr, "First char of swizzle is not valid.\n"); - return 0; - } - for (i = 0; i < 4; i++, str_index++) { - if (tokens.Swizzle.String[str_index] == '-') { - src_reg->Negate |= (1 << i); - str_index++; - } - switch(tokens.Swizzle.String[str_index]) { - case 'x': - SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_X); - break; - case 'y': - SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Y); - break; - case 'z': - SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Z); - break; - case 'w': - SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_W); - break; - case '1': - SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ONE); - break; - case '0': - SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ZERO); - break; - case 'H': - SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_HALF); - break; - case '_': - SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_UNUSED); - break; - default: - fprintf(stderr, "Unknown src register swizzle.\n"); - return 0; - } - } - } - DBG("File=%u index=%u swizzle=%x negate=%u abs=%u\n", - src_reg->File, src_reg->Index, src_reg->Swizzle, - src_reg->Negate, src_reg->Abs); - return 1; -} - -#define REGEX_DST_MATCHES 4 - -struct dst_tokens { - struct match_info File; - struct match_info Index; - struct match_info WriteMask; -}; - -/** - * Initialize the destination for the instruction based on dst_str. - * - * NOTE: Warning in init_rc_normal_instruction() applies to this function as - * well. - * - * @param dst_str A string that represents the destination register. The format - * for this string is the same that is output by rc_program_print. - * @return 1 On success, 0 on failure - */ -int init_rc_normal_dst( - struct rc_instruction * inst, - const char * dst_str) -{ - const char * regex_str = "([[:lower:]]*)\\[([[:digit:]]*)\\](\\.*[[:lower:]]*)"; - regmatch_t matches[REGEX_DST_MATCHES]; - struct dst_tokens tokens; - unsigned int i; - - /* Execute the regex */ - if (!regex_helper(regex_str, dst_str, matches, REGEX_DST_MATCHES)) { - fprintf(stderr, "Failed to execute regex for dst register.\n"); - return 0; - } - - /* Create Tokens */ - tokens.File.String = dst_str + matches[1].rm_so; - tokens.File.Length = match_length(matches, 1); - tokens.Index.String = dst_str + matches[2].rm_so; - tokens.Index.Length = match_length(matches, 2); - tokens.WriteMask.String = dst_str + matches[3].rm_so; - tokens.WriteMask.Length = match_length(matches, 3); - - /* File Type */ - if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) { - inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - } else if (!strncmp(tokens.File.String, "output", tokens.File.Length)) { - inst->U.I.DstReg.File = RC_FILE_OUTPUT; - } else { - fprintf(stderr, "Unknown dst register file type.\n"); - return 0; - } - - /* File Index */ - errno = 0; - inst->U.I.DstReg.Index = strtol(tokens.Index.String, NULL, 10); - - if (errno > 0) { - fprintf(stderr, "Could not convert dst register index\n"); - return 0; - } - - /* WriteMask */ - if (tokens.WriteMask.Length == 0) { - inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - } else { - /* The first character should be '.' */ - if (tokens.WriteMask.String[0] != '.') { - fprintf(stderr, "1st char of writemask is not valid.\n"); - return 0; - } - for (i = 1; i < tokens.WriteMask.Length; i++) { - switch(tokens.WriteMask.String[i]) { - case 'x': - inst->U.I.DstReg.WriteMask |= RC_MASK_X; - break; - case 'y': - inst->U.I.DstReg.WriteMask |= RC_MASK_Y; - break; - case 'z': - inst->U.I.DstReg.WriteMask |= RC_MASK_Z; - break; - case 'w': - inst->U.I.DstReg.WriteMask |= RC_MASK_W; - break; - default: - fprintf(stderr, "Unknown swizzle in writemask.\n"); - return 0; - } - } - } - DBG("Dst Reg File=%u Index=%d Writemask=%d\n", - inst->U.I.DstReg.File, - inst->U.I.DstReg.Index, - inst->U.I.DstReg.WriteMask); - return 1; -} - -#define REGEX_INST_MATCHES 7 - -struct inst_tokens { - struct match_info Opcode; - struct match_info Sat; - struct match_info Dst; - struct match_info Srcs[3]; -}; - -/** - * Initialize a normal instruction based on inst_str. - * - * WARNING: This function might not be able to handle every kind of format that - * rc_program_print() can output. If you are having problems with a - * particular string, you may need to add support for it to this functions. - * - * @param inst_str A string that represents the source register. The format for - * this string is the same that is output by rc_program_print. - * @return 1 On success, 0 on failure - */ -int init_rc_normal_instruction( - struct rc_instruction * inst, - const char * inst_str) -{ - const char * regex_str = "([[:upper:]]+)(_SAT)* ([^,]*)[, ]*([^,]*)[, ]*([^,]*)[, ]*([^;]*)"; - int i; - regmatch_t matches[REGEX_INST_MATCHES]; - struct inst_tokens tokens; - - /* Initialize inst */ - memset(inst, 0, sizeof(struct rc_instruction)); - inst->Type = RC_INSTRUCTION_NORMAL; - - /* Execute the regex */ - if (!regex_helper(regex_str, inst_str, matches, REGEX_INST_MATCHES)) { - return 0; - } - memset(&tokens, 0, sizeof(tokens)); - - /* Create Tokens */ - tokens.Opcode.String = inst_str + matches[1].rm_so; - tokens.Opcode.Length = match_length(matches, 1); - if (matches[2].rm_so > -1) { - tokens.Sat.String = inst_str + matches[2].rm_so; - tokens.Sat.Length = match_length(matches, 2); - } - - - /* Fill out the rest of the instruction. */ - for (i = 0; i < MAX_RC_OPCODE; i++) { - const struct rc_opcode_info * info = rc_get_opcode_info(i); - unsigned int first_src = 3; - unsigned int j; - if (strncmp(tokens.Opcode.String, info->Name, tokens.Opcode.Length)) { - continue; - } - inst->U.I.Opcode = info->Opcode; - if (info->HasDstReg) { - char * dst_str; - tokens.Dst.String = inst_str + matches[3].rm_so; - tokens.Dst.Length = match_length(matches, 3); - first_src++; - - dst_str = malloc(sizeof(char) * (tokens.Dst.Length + 1)); - strncpy(dst_str, tokens.Dst.String, tokens.Dst.Length); - dst_str[tokens.Dst.Length] = '\0'; - init_rc_normal_dst(inst, dst_str); - free(dst_str); - } - for (j = 0; j < info->NumSrcRegs; j++) { - char * src_str; - tokens.Srcs[j].String = - inst_str + matches[first_src + j].rm_so; - tokens.Srcs[j].Length = - match_length(matches, first_src + j); - - src_str = malloc(sizeof(char) * - (tokens.Srcs[j].Length + 1)); - strncpy(src_str, tokens.Srcs[j].String, - tokens.Srcs[j].Length); - src_str[tokens.Srcs[j].Length] = '\0'; - init_rc_normal_src(inst, j, src_str); - } - break; - } - return 1; -} diff --git a/src/mesa/drivers/dri/r300/compiler/tests/rc_test_helpers.h b/src/mesa/drivers/dri/r300/compiler/tests/rc_test_helpers.h deleted file mode 100644 index 1a6bf96..0000000 --- a/src/mesa/drivers/dri/r300/compiler/tests/rc_test_helpers.h +++ /dev/null @@ -1,13 +0,0 @@ - -int init_rc_normal_src( - struct rc_instruction * inst, - unsigned int src_index, - const char * src_str); - -int init_rc_normal_dst( - struct rc_instruction * inst, - const char * dst_str); - -int init_rc_normal_instruction( - struct rc_instruction * inst, - const char * inst_str); diff --git a/src/mesa/drivers/dri/r300/compiler/tests/unit_test.c b/src/mesa/drivers/dri/r300/compiler/tests/unit_test.c deleted file mode 100644 index 266f336..0000000 --- a/src/mesa/drivers/dri/r300/compiler/tests/unit_test.c +++ /dev/null @@ -1,35 +0,0 @@ -#include -#include -#include - -#include "unit_test.h" - -void run_tests(struct test tests[]) -{ - int i; - for (i = 0; tests[i].name; i++) { - printf("Test %s\n", tests[i].name); - memset(&tests[i].result, 0, sizeof(tests[i].result)); - tests[i].test_func(&tests[i].result); - printf("Test %s (%d/%d) pass\n", tests[i].name, - tests[i].result.pass, tests[i].result.test_count); - } -} - -void test_begin(struct test_result * result) -{ - result->test_count++; -} - -void test_check(struct test_result * result, int cond) -{ - printf("Subtest %u -> ", result->test_count); - if (cond) { - result->pass++; - printf("Pass"); - } else { - result->fail++; - printf("Fail"); - } - printf("\n"); -} diff --git a/src/mesa/drivers/dri/r300/compiler/tests/unit_test.h b/src/mesa/drivers/dri/r300/compiler/tests/unit_test.h deleted file mode 100644 index 441e8b6..0000000 --- a/src/mesa/drivers/dri/r300/compiler/tests/unit_test.h +++ /dev/null @@ -1,17 +0,0 @@ - -struct test_result { - unsigned int test_count; - unsigned int pass; - unsigned int fail; -}; - -struct test { - const char * name; - void (*test_func)(struct test_result * result); - struct test_result result; -}; - -void run_tests(struct test tests[]); - -void test_begin(struct test_result * result); -void test_check(struct test_result * result, int cond);