From: Gert Wollny Date: Sat, 5 Aug 2023 08:04:47 +0000 (+0200) Subject: r600: retire SB optimizer X-Git-Tag: upstream/23.3.3~3543 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d1b0629c4142c6bcc89d5abb3d19c02594d0932c;p=platform%2Fupstream%2Fmesa.git r600: retire SB optimizer The NIR backend is good enough and here is already a long list of reasons why SB should not be called because it doesn't handle certain instructions correctly. v2: - remove more references to SB (Vitaly Kuzmin) - remove unused sb context (Sam Ravnborg) v3: - drop used variable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7166 Signed-off-by: Gert Wollny Part-of: --- diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 8869682..20daa0f 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -49,7 +49,6 @@ #include "evergreen_compute.h" #include "evergreen_compute_internal.h" #include "compute_memory_pool.h" -#include "sb/sb_public.h" #include /** diff --git a/src/gallium/drivers/r600/meson.build b/src/gallium/drivers/r600/meson.build index d14f1ac..98df693 100644 --- a/src/gallium/drivers/r600/meson.build +++ b/src/gallium/drivers/r600/meson.build @@ -72,39 +72,6 @@ files_r600 = files( 'radeon_vce.h', 'radeon_video.c', 'radeon_video.h', - 'sb/sb_bc_builder.cpp', - 'sb/sb_bc_decoder.cpp', - 'sb/sb_bc_dump.cpp', - 'sb/sb_bc_finalize.cpp', - 'sb/sb_bc.h', - 'sb/sb_bc_parser.cpp', - 'sb/sb_context.cpp', - 'sb/sb_core.cpp', - 'sb/sb_dce_cleanup.cpp', - 'sb/sb_def_use.cpp', - 'sb/sb_dump.cpp', - 'sb/sb_expr.cpp', - 'sb/sb_expr.h', - 'sb/sb_gcm.cpp', - 'sb/sb_gvn.cpp', - 'sb/sb_if_conversion.cpp', - 'sb/sb_ir.cpp', - 'sb/sb_ir.h', - 'sb/sb_liveness.cpp', - 'sb/sb_pass.cpp', - 'sb/sb_pass.h', - 'sb/sb_peephole.cpp', - 'sb/sb_psi_ops.cpp', - 'sb/sb_public.h', - 'sb/sb_ra_checker.cpp', - 'sb/sb_ra_coalesce.cpp', - 'sb/sb_ra_init.cpp', - 'sb/sb_sched.cpp', - 'sb/sb_sched.h', - 'sb/sb_shader.cpp', - 'sb/sb_shader.h', - 'sb/sb_ssa_builder.cpp', - 'sb/sb_valtable.cpp', 'sfn/sfn_alu_defines.cpp', 'sfn/sfn_alu_defines.h', 'sfn/sfn_alu_readport_validation.cpp', diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 26d4022..ec64c4d 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -33,8 +33,6 @@ #include "util/u_math.h" #include "pipe/p_shader_tokens.h" -#include "sb/sb_public.h" - #define NUM_OF_CYCLES 3 #define NUM_OF_COMPONENTS 4 @@ -2817,8 +2815,6 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, uint32_t *bytecode; int i, j, r, fs_size; struct r600_fetch_shader *shader; - unsigned no_sb = rctx->screen->b.debug_flags & (DBG_NO_SB | DBG_NIR); - unsigned sb_disasm = !no_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); assert(count < 32); @@ -2919,13 +2915,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, fprintf(stderr, "\n"); } - if (!sb_disasm) { - r600_bytecode_disasm(&bc); - - fprintf(stderr, "______________________________________________________________\n"); - } else { - r600_sb_bytecode_process(rctx, &bc, NULL, 1 /*dump*/, 0 /*optimize*/); - } + r600_bytecode_disasm(&bc); } fs_size = bc.ndw*4; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 82213a5..aff820a 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -26,8 +26,6 @@ #include "evergreen_compute.h" #include "r600d.h" -#include "sb/sb_public.h" - #include #include "pipe/p_shader_tokens.h" #include "util/u_debug.h" @@ -46,17 +44,7 @@ static const struct debug_named_value r600_debug_options[] = { /* features */ { "nocpdma", DBG_NO_CP_DMA, "Disable CP DMA" }, - /* shader backend */ - { "nosb", DBG_NO_SB, "Disable sb backend for graphics shaders" }, - { "sbdry", DBG_SB_DRY_RUN, "Don't use optimized bytecode (just print the dumps)" }, - { "sbstat", DBG_SB_STAT, "Print optimization statistics for shaders" }, - { "sbdump", DBG_SB_DUMP, "Print IR dumps after some optimization passes" }, - { "sbnofallback", DBG_SB_NO_FALLBACK, "Abort on errors instead of fallback" }, - { "sbdisasm", DBG_SB_DISASM, "Use sb disassembler for shader dumps" }, - { "sbsafemath", DBG_SB_SAFEMATH, "Disable unsafe math optimizations" }, - { "nirsb", DBG_NIR_SB, "Enable NIR with SB optimizer"}, - - DEBUG_NAMED_VALUE_END /* must be last */ + DEBUG_NAMED_VALUE_END /* must be last */ }; /* @@ -70,8 +58,6 @@ static void r600_destroy_context(struct pipe_context *context) r600_isa_destroy(rctx->isa); - r600_sb_context_destroy(rctx->sb_context); - for (sh = 0; sh < (rctx->b.gfx_level < EVERGREEN ? R600_NUM_HW_STAGES : EG_NUM_HW_STAGES); sh++) { r600_resource_reference(&rctx->scratch_buffers[sh].buffer, NULL); } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 544809e..57b1d2b 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -259,15 +259,6 @@ struct r600_gs_rings_state { /* This must start from 16. */ /* features */ #define DBG_NO_CP_DMA (1 << 30) -/* shader backend */ -#define DBG_NO_SB (1 << 21) -#define DBG_SB_DRY_RUN (1 << 23) -#define DBG_SB_STAT (1 << 24) -#define DBG_SB_DUMP (1 << 25) -#define DBG_SB_NO_FALLBACK (1 << 26) -#define DBG_SB_DISASM (1 << 27) -#define DBG_SB_SAFEMATH (1 << 28) -#define DBG_NIR_SB (1 << 28) struct r600_screen { struct r600_common_screen b; @@ -595,7 +586,6 @@ struct r600_context { enum mesa_prim last_rast_prim; unsigned last_start_instance; - void *sb_context; struct r600_isa *isa; float sample_positions[4 * 16]; float tess_state[8]; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 50938c2..c77e032 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -32,8 +32,6 @@ #include "r600d.h" #include "sfn/sfn_nir.h" -#include "sb/sb_public.h" - #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" @@ -151,8 +149,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_shader_selector *sel = shader->selector; int r; - struct r600_screen *rscreen = (struct r600_screen *)ctx->screen; - const nir_shader_compiler_options *nir_options = (const nir_shader_compiler_options *) ctx->screen->get_compiler_options(ctx->screen, @@ -170,9 +166,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, pipe_shader_type_from_mesa(sel->nir->info.stage); bool dump = r600_can_dump_shader(&rctx->screen->b, processor); - bool use_sb = rctx->screen->b.debug_flags & DBG_NIR_SB; - unsigned sb_disasm; unsigned export_shader; shader->shader.bc.isa = rctx->isa; @@ -226,36 +220,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx, r600_dump_streamout(&sel->so); } } - - if (shader->shader.processor_type == PIPE_SHADER_VERTEX) { - /* only disable for vertex shaders in tess paths */ - if (key.vs.as_ls) - use_sb = 0; - } - use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_CTRL); - use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_EVAL); - use_sb &= (shader->shader.processor_type != PIPE_SHADER_COMPUTE); - - /* disable SB for shaders using doubles */ - use_sb &= !shader->shader.uses_doubles; - - use_sb &= !shader->shader.uses_atomics; - use_sb &= !shader->shader.uses_images; - use_sb &= !shader->shader.uses_helper_invocation; - use_sb &= !shader->shader.disable_sb; - - /* SB can't handle READ_SCRATCH properly */ - use_sb &= !(shader->shader.needs_scratch_space && rscreen->b.gfx_level < R700); - - /* sb has bugs in array reg allocation - * (dEQP-GLES2.functional.shaders.struct.local.struct_array_dynamic_index_fragment - * with NTT) - */ - use_sb &= !(shader->shader.indirect_files & (1 << TGSI_FILE_TEMPORARY)); - use_sb &= !(shader->shader.indirect_files & (1 << TGSI_FILE_CONSTANT)); - - /* sb has scheduling assertion fails with interpolate_at. */ - use_sb &= !shader->shader.uses_interpolate_at_sample; /* Check if the bytecode has already been built. */ if (!shader->shader.bc.bytecode) { @@ -266,22 +230,12 @@ int r600_pipe_shader_create(struct pipe_context *ctx, } } - sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); - if (dump && !sb_disasm) { + if (dump) { fprintf(stderr, "--------------------------------------------------------------\n"); r600_bytecode_disasm(&shader->shader.bc); fprintf(stderr, "______________________________________________________________\n"); - } else if ((dump && sb_disasm) || use_sb) { - r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, - dump, use_sb); - if (r) { - R600_ERR("r600_sb_bytecode_process failed !\n"); - goto error; - } - } - if (dump) { - print_shader_info(stderr, nshader++, &shader->shader); + print_shader_info(stderr, nshader++, &shader->shader); print_pipe_info(stderr, &sel->info); } diff --git a/src/gallium/drivers/r600/sb/notes.markdown b/src/gallium/drivers/r600/sb/notes.markdown deleted file mode 100644 index e48135c..0000000 --- a/src/gallium/drivers/r600/sb/notes.markdown +++ /dev/null @@ -1,415 +0,0 @@ -r600-sb -======= - -* * * * * - -Debugging ---------- - -### Environment variables - -- **R600\_DEBUG** - - There are new flags: - - - **nosb** - Disable sb backend for graphics shaders - - **sbdry** - Dry run, optimize but use source bytecode - - useful if you only want to check shader dumps - without the risk of lockups and other problems - - **sbstat** - Print optimization statistics (only time so far) - - **sbdump** - Print IR after some passes. - - **sbnofallback** - Abort on errors instead of fallback - - **sbdisasm** - Use sb disassembler for shader dumps - - **sbsafemath** - Disable unsafe math optimizations - -### Regression debugging - -If there are any regressions as compared to the default backend -(R600\_SB=0), it's possible to use the following environment variables -to find the incorrectly optimized shader that causes the regression. - -- **R600\_SB\_DSKIP\_MODE** - allows to skip optimization for some - shaders - - 0 - disabled (default) - - 1 - skip optimization for the shaders in the range - [R600\_SB\_DSKIP\_START; R600\_SB\_DSKIP\_END], that is, - optimize only the shaders that are not in this range - - 2 - optimize only the shaders in the range - [R600\_SB\_DSKIP\_START; R600\_SB\_DSKIP\_END] - -- **R600\_SB\_DSKIP\_START** - start of the range (1-based) - -- **R600\_SB\_DSKIP\_END** - end of the range (1-based) - -Example - optimize only the shaders 5, 6, and 7: - - R600_SB_DSKIP_START=5 R600_SB_DSKIP_END=7 R600_SB_DSKIP_MODE=2 - -All shaders compiled by the application are numbered starting from 1, -the number of shaders used by the application may be obtained by running -it with "R600_DEBUG=sb,sbstat" - it will print "sb: shader \#index\#" -for each compiled shader. - -After figuring out the total number of shaders used by the application, -the variables above allow to use bisection to find the shader that is -the cause of regression. E.g. if the application uses 100 shaders, we -can divide the range [1; 100] and run the application with the -optimization enabled only for the first half of the shaders: - - R600_SB_DSKIP_START=1 R600_SB_DSKIP_END=50 R600_SB_DSKIP_MODE=2 - -If the regression is reproduced with these parameters, then the failing -shader is in the range [1; 50], if it's not reproduced - then it's in -the range [51; 100]. Then we can divide the new range again and repeat -the testing, until we'll reduce the range to a single failing shader. - -*NOTE: This method relies on the assumption that the application -produces the same sequence of the shaders on each run. It's not always -true - some applications may produce different sequences of the shaders, -in such cases the tools like apitrace may be used to record the trace -with the application, then this method may be applied when replaying the -trace - also this may be faster and/or more convenient than testing the -application itself.* - -* * * * * - -Intermediate Representation ---------------------------- - -### Values - -All kinds of the operands (literal constants, references to kcache -constants, references to GPRs, etc) are currently represented by the -**value** class (possibly it makes sense to switch to hierarchy of -classes derived from **value** instead, to save some memory). - -All values (except some pseudo values like the exec\_mask or predicate -register) represent 32bit scalar values - there are no vector values, -CF/FETCH instructions use groups of 4 values for src and dst operands. - -### Nodes - -Shader programs are represented using the tree data structure, some -nodes contain a list of subnodes. - -#### Control flow nodes - -Control flow information is represented using four special node types -(based on the ideas from [[1]](#references) ) - -- **region\_node** - single-entry, single-exit region. - - All loops and if's in the program are enclosed in region nodes. - Region nodes have two containers for phi nodes - - region\_node::loop\_phi contains the phi expressions to be executed - at the region entry, region\_node::phi contains the phi expressions - to be executed at the region exit. It's the only type of the node - that contains associated phi expressions. - -- **depart\_node** - "depart region \$id after { ... }" - - Depart target region (jump to exit point) after executing contained - code. - -- **repeat\_node** - "repeat region \$id after { ... }" - - Repeat target region (jump to entry point) after executing contained - code. - -- **if\_node** - "if (cond) { ... }" - - Execute contained code if condition is true. The difference from - [[1]](#references) is that we don't have associated phi expressions - for the **if\_node**, we enclose **if\_node** in the - **region\_node** and store corresponding phi's in the - **region\_node**, this allows more uniform handling. - -The target region of depart and repeat nodes is always the region where -they are located (possibly in the nested region), there are no arbitrary -jumps/goto's - control flow in the program is always structured. - -Typical control flow constructs can be represented as in the following -examples: - -GLSL: - - if (cond) { - < 1 > - } else { - < 2 > - } - -IR: - - region #0 { - depart region #0 after { - if (cond) { - depart region #0 after { - < 1 > - } - } - < 2 > - } - - } - -GLSL: - - while (cond) { - < 1 > - } - -IR: - - region #0 { - - repeat region #0 after { - region #1 { - depart region #1 after { - if (!cond) { - depart region #0 - } - } - } - < 1 > - } - - } - -'Break' and 'continue' inside the loops are directly translated to the -depart and repeat nodes for the corresponding loop region. - -This may look a bit too complicated, but in fact this allows more simple -and uniform handling of the control flow. - -All loop\_phi and phi nodes for some region always have the same number -of source operands. The number of source operands for -region\_node::loop\_phi nodes is 1 + number of repeat nodes that -reference this region as a target. The number of source operands for -region\_node::phi nodes is equal to the number of depart nodes that -reference this region as a target. All depart/repeat nodes for the -region have unique indices equal to the index of source operand for -phi/loop\_phi nodes. - -First source operand for region\_node::loop\_phi nodes (src[0]) is an -incoming value that enters the region from the outside. Each remaining -source operand comes from the corresponding repeat node. - -More complex example: - -GLSL: - - a = 1; - while (a < 5) { - a = a * 2; - if (b == 3) { - continue; - } else { - a = 6; - } - if (c == 4) - break; - a = a + 1; - } - -IR with SSA form: - - a.1 = 1; - region #0 { - // loop phi values: src[0] - incoming, src[1] - from repeat_1, src[2] - from repeat_2 - region#0 loop_phi: a.2 = phi a.1, a.6, a.3 - - repeat_1 region #0 after { - a.3 = a.2 * 2; - cond1 = (b == 3); - region #1 { - depart_0 region #1 after { - if (cond1) { - repeat_2 region #0; - } - } - a.4 = 6; - - region #1 phi: a.5 = phi a.4; // src[0] - from depart_0 - } - cond2 = (c == 4); - region #2 { - depart_0 region #2 after { - if (cond2) { - depart_0 region #0; - } - } - } - a.6 = a.5 + 1; - } - - region #0 phi: a.7 = phi a.5 // src[0] from depart_0 - } - -Phi nodes with single source operand are just copies, they are not -really necessary, but this allows to handle all **depart\_node**s in the -uniform way. - -#### Instruction nodes - -Instruction nodes represent different kinds of instructions - -**alu\_node**, **cf\_node**, **fetch\_node**, etc. Each of them contains -the "bc" structure where all fields of the bytecode are stored (the type -is **bc\_alu** for **alu\_node**, etc). The operands are represented -using the vectors of pointers to **value** class (node::src, node::dst) - -#### SSA-specific nodes - -Phi nodes currently don't have special node class, they are stored as -**node**. Destination vector contains a single destination value, source -vector contains 1 or more source values. - -Psi nodes [[5], [6]](#references) also don't have a special node class -and stored as **node**. Source vector contains 3 values for each source -operand - the **value** of predicate, **value** of corresponding -PRED\_SEL field, and the source **value** itself. - -### Indirect addressing - -Special kind of values (VLK\_RELREG) is used to represent indirect -operands. These values don't have SSA versions. The representation is -mostly based on the [[2]](#references). Indirect operand contains the -"offset/address" value (value::rel), (e.g. some SSA version of the AR -register value, though after some passes it may be any value - constant, -register, etc), also it contains the maydef and mayuse vectors of -pointers to **value**s (similar to dst/src vectors in the **node**) to -represent the effects of aliasing in the SSA form. - -E.g. if we have the array R5.x ... R8.x and the following instruction : - - MOV R0.x, R[5 + AR].x - -then source indirect operand is represented with the VLK\_RELREG value, -value::rel is AR, value::maydef is empty (in fact it always contain the -same number of elements as mayuse to simplify the handling, but they are -NULLs), value::mayuse contains [R5.x, R6.x, R7.x, R8.x] (or the -corresponding SSA versions after ssa\_rename). - -Additional "virtual variables" as in [HSSA [2]](#references) are not -used, also there is no special handling for "zero versions". Typical -programs in our case are small, indirect addressing is rare, array sizes -are limited by max gpr number, so we don't really need to use special -tricks to avoid the explosion of value versions. Also this allows more -precise liveness computation for array elements without modifications to -the algorithms. - -With the following instruction: - - MOV R[5+AR].x, R0.x - -we'll have both maydef and mayuse vectors for dst operand filled with -array values initially: [R5.x, R6.x, R7.x, R8.x]. After the ssa\_rename -pass mayuse will contain previous versions, maydef will contain new -potentially-defined versions. - -* * * * * - -Passes ------- - -- **bc\_parser** - creates the IR from the source bytecode, - initializes src and dst value vectors for instruction nodes. Most - ALU nodes have one dst operand and the number of source operands is - equal to the number of source operands for the ISA instruction. - Nodes for PREDSETxx instructions have 3 dst operands - dst[0] is dst - gpr as in the original instruction, other two are pseudo-operands - that represent possibly updated predicate and exec\_mask. Predicate - values are used in the predicated alu instructions (node::pred), - exec\_mask values are used in the if\_nodes (if\_node::cond). Each - vector operand in the CF/TEX/VTX instructions is represented with 4 - values - components of the vector. - -- **ssa\_prepare** - creates phi expressions. - -- **ssa\_rename** - renames the values (assigns versions). - -- **liveness** - liveness computation, sets 'dead' flag for unused - nodes and values, optionally computes interference information for - the values. - -- **dce\_cleanup** - eliminates 'dead' nodes, also removes some - unnecessary nodes created by bc\_parser, e.g. the nodes for the JUMP - instructions in the source, containers for ALU groups (they were - only needed for the ssa\_rename pass) - -- **if\_conversion** - converts control flow with if\_nodes to the - data flow in cases where it can improve performance (small alu-only - branches). Both branches are executed speculatively and the phi - expressions are replaced with conditional moves (CNDxx) to select - the final value using the same condition predicate as was used by - the original if\_node. E.g. **if\_node** used dst[2] from PREDSETxx - instruction, CNDxx now uses dst[0] from the same PREDSETxx - instruction. - -- **peephole** - peephole optimizations - -- **gvn** - Global Value Numbering [[2]](#references), - [[3]](#references) - -- **gcm** - Global Code Motion [[3]](#references). Also performs - grouping of the instructions of the same kind (CF/FETCH/ALU). - -- register allocation passes, some ideas are used from - [[4]](#references), but implementation is simplified to make it more - efficient in terms of the compilation speed (e.g. no recursive - recoloring) while achieving good enough results. - - - **ra\_split** - prepares the program to register allocation. - Splits live ranges for constrained values by inserting the - copies to/from temporary values, so that the live range of the - constrained values becomes minimal. - - - **ra\_coalesce** - performs global allocation on registers used - in CF/FETCH instructions. It's performed first to make sure they - end up in the same GPR. Also tries to allocate all values - involved in copies (inserted by the ra\_split pass) to the same - register, so that the copies may be eliminated. - - - **ra\_init** - allocates gpr arrays (if indirect addressing is - used), and remaining values. - -- **post\_scheduler** - ALU scheduler, handles VLIW packing and - performs the final register allocation for local values inside ALU - clauses. Eliminates all coalesced copies (if src and dst of the copy - are allocated to the same register). - -- **ra\_checker** - optional debugging pass that tries to catch basic - errors of the scheduler or regalloc, - -- **bc\_finalize** - propagates the regalloc information from values - in node::src and node::dst vectors to the bytecode fields, converts - control flow structure (region/depart/repeat) to the target - instructions (JUMP/ELSE/POP, - LOOP\_START/LOOP\_END/LOOP\_CONTINUE/LOOP\_BREAK). - -- **bc\_builder** - builds final bytecode, - -* * * * * - -References ----------- - -[1] ["Tree-Based Code Optimization. A Thesis Proposal", Carl -McConnell](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.38.4210&rep=rep1&type=pdf) - -[2] ["Effective Representation of Aliases and Indirect Memory Operations -in SSA Form", Fred Chow, Sun Chan, Shin-Ming Liu, Raymond Lo, Mark -Streich](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.33.6974&rep=rep1&type=pdf) - -[3] ["Global Code Motion. Global Value Numbering.", Cliff -Click](http://www.cs.washington.edu/education/courses/cse501/06wi/reading/click-pldi95.pdf) - -[4] ["Register Allocation for Programs in SSA Form", Sebastian -Hack](http://digbib.ubka.uni-karlsruhe.de/volltexte/documents/6532) - -[5] ["An extension to the SSA representation for predicated code", -Francois de -Ferriere](http://www.cdl.uni-saarland.de/ssasem/talks/Francois.de.Ferriere.pdf) - -[6] ["Improvements to the Psi-SSA Representation", F. de -Ferriere](http://www.scopesconf.org/scopes-07/presentations/3_Presentation.pdf) diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h deleted file mode 100644 index ef2f398..0000000 --- a/src/gallium/drivers/r600/sb/sb_bc.h +++ /dev/null @@ -1,1037 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#ifndef SB_BC_H_ -#define SB_BC_H_ - -#include -#include "r600_isa.h" - -#include -#include -#include -#include - -struct r600_bytecode; -struct r600_shader; - -namespace r600_sb { - -class hw_encoding_format; -class node; -class alu_node; -class cf_node; -class fetch_node; -class alu_group_node; -class region_node; -class shader; -class value; - -class sb_ostream { -public: - sb_ostream() {} - - virtual void write(const char *s) = 0; - - sb_ostream& operator <<(const char *s) { - write(s); - return *this; - } - - sb_ostream& operator <<(const std::string& s) { - return *this << s.c_str(); - } - - sb_ostream& operator <<(void *p) { - char b[32]; - sprintf(b, "%p", p); - return *this << b; - } - - sb_ostream& operator <<(char c) { - char b[2]; - sprintf(b, "%c", c); - return *this << b; - } - - sb_ostream& operator <<(int n) { - char b[32]; - sprintf(b, "%d", n); - return *this << b; - } - - sb_ostream& operator <<(unsigned n) { - char b[32]; - sprintf(b, "%u", n); - return *this << b; - } - - sb_ostream& operator <<(double d) { - char b[32]; - snprintf(b, 32, "%g", d); - return *this << b; - } - - // print as field of specified width, right aligned - void print_w(int n, int width) { - char b[256],f[8]; - sprintf(f, "%%%dd", width); - snprintf(b, 256, f, n); - write(b); - } - - // print as field of specified width, left aligned - void print_wl(int n, int width) { - char b[256],f[8]; - sprintf(f, "%%-%dd", width); - snprintf(b, 256, f, n); - write(b); - } - - // print as field of specified width, left aligned - void print_wl(const std::string &s, int width) { - write(s.c_str()); - int l = s.length(); - while (l++ < width) { - write(" "); - } - } - - // print int as field of specified width, right aligned, zero-padded - void print_zw(int n, int width) { - char b[256],f[8]; - sprintf(f, "%%0%dd", width); - snprintf(b, 256, f, n); - write(b); - } - - // print int as field of specified width, right aligned, zero-padded, hex - void print_zw_hex(int n, int width) { - char b[256],f[8]; - sprintf(f, "%%0%dx", width); - snprintf(b, 256, f, n); - write(b); - } -}; - -class sb_ostringstream : public sb_ostream { - std::string data; -public: - sb_ostringstream() : data() {} - - virtual void write(const char *s) { - data += s; - } - - void clear() { data.clear(); } - - const char* c_str() { return data.c_str(); } - std::string& str() { return data; } -}; - -class sb_log : public sb_ostream { - FILE *o; -public: - sb_log() : o(stderr) {} - - virtual void write(const char *s) { - fputs(s, o); - } -}; - -extern sb_log sblog; - -enum shader_target -{ - TARGET_UNKNOWN, - TARGET_VS, - TARGET_ES, - TARGET_PS, - TARGET_GS, - TARGET_GS_COPY, - TARGET_COMPUTE, - TARGET_FETCH, - TARGET_HS, - TARGET_LS, - - TARGET_NUM -}; - -enum sb_hw_class_bits -{ - HB_R6 = (1<<0), - HB_R7 = (1<<1), - HB_EG = (1<<2), - HB_CM = (1<<3), - - HB_R6R7 = (HB_R6 | HB_R7), - HB_EGCM = (HB_EG | HB_CM), - HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG), - HB_R7EGCM = (HB_R7 | HB_EG | HB_CM), - - HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM) -}; - -enum sb_hw_chip -{ - HW_CHIP_UNKNOWN, - HW_CHIP_R600, - HW_CHIP_RV610, - HW_CHIP_RV630, - HW_CHIP_RV670, - HW_CHIP_RV620, - HW_CHIP_RV635, - HW_CHIP_RS780, - HW_CHIP_RS880, - HW_CHIP_RV770, - HW_CHIP_RV730, - HW_CHIP_RV710, - HW_CHIP_RV740, - HW_CHIP_CEDAR, - HW_CHIP_REDWOOD, - HW_CHIP_JUNIPER, - HW_CHIP_CYPRESS, - HW_CHIP_HEMLOCK, - HW_CHIP_PALM, - HW_CHIP_SUMO, - HW_CHIP_SUMO2, - HW_CHIP_BARTS, - HW_CHIP_TURKS, - HW_CHIP_CAICOS, - HW_CHIP_CAYMAN, - HW_CHIP_ARUBA -}; - -enum sb_hw_class -{ - HW_CLASS_UNKNOWN, - HW_CLASS_R600, - HW_CLASS_R700, - HW_CLASS_EVERGREEN, - HW_CLASS_CAYMAN -}; - -enum alu_slots { - SLOT_X = 0, - SLOT_Y = 1, - SLOT_Z = 2, - SLOT_W = 3, - SLOT_TRANS = 4 -}; - -enum misc_consts { - MAX_ALU_LITERALS = 4, - MAX_ALU_SLOTS = 128, - MAX_GPR = 128, - MAX_CHAN = 4 - -}; - -enum alu_src_sel { - - ALU_SRC_LDS_OQ_A = 219, - ALU_SRC_LDS_OQ_B = 220, - ALU_SRC_LDS_OQ_A_POP = 221, - ALU_SRC_LDS_OQ_B_POP = 222, - ALU_SRC_LDS_DIRECT_A = 223, - ALU_SRC_LDS_DIRECT_B = 224, - ALU_SRC_TIME_HI = 227, - ALU_SRC_TIME_LO = 228, - ALU_SRC_MASK_HI = 229, - ALU_SRC_MASK_LO = 230, - ALU_SRC_HW_WAVE_ID = 231, - ALU_SRC_SIMD_ID = 232, - ALU_SRC_SE_ID = 233, - ALU_SRC_HW_THREADGRP_ID = 234, - ALU_SRC_WAVE_ID_IN_GRP = 235, - ALU_SRC_NUM_THREADGRP_WAVES = 236, - ALU_SRC_HW_ALU_ODD = 237, - ALU_SRC_LOOP_IDX = 238, - ALU_SRC_PARAM_BASE_ADDR = 240, - ALU_SRC_NEW_PRIM_MASK = 241, - ALU_SRC_PRIM_MASK_HI = 242, - ALU_SRC_PRIM_MASK_LO = 243, - ALU_SRC_1_DBL_L = 244, - ALU_SRC_1_DBL_M = 245, - ALU_SRC_0_5_DBL_L = 246, - ALU_SRC_0_5_DBL_M = 247, - ALU_SRC_0 = 248, - ALU_SRC_1 = 249, - ALU_SRC_1_INT = 250, - ALU_SRC_M_1_INT = 251, - ALU_SRC_0_5 = 252, - ALU_SRC_LITERAL = 253, - ALU_SRC_PV = 254, - ALU_SRC_PS = 255, - - ALU_SRC_PARAM_OFFSET = 448 -}; - -enum alu_predicate_select -{ - PRED_SEL_OFF = 0, -// RESERVED = 1, - PRED_SEL_0 = 2, - PRED_SEL_1 = 3 -}; - - -enum alu_omod { - OMOD_OFF = 0, - OMOD_M2 = 1, - OMOD_M4 = 2, - OMOD_D2 = 3 -}; - -enum alu_index_mode { - INDEX_AR_X = 0, - INDEX_AR_Y_R600 = 1, - INDEX_AR_Z_R600 = 2, - INDEX_AR_W_R600 = 3, - - INDEX_LOOP = 4, - INDEX_GLOBAL = 5, - INDEX_GLOBAL_AR_X = 6 -}; - -enum alu_cayman_mova_dst { - CM_MOVADST_AR_X, - CM_MOVADST_PC, - CM_MOVADST_IDX0, - CM_MOVADST_IDX1, - CM_MOVADST_CG0, // clause-global byte 0 - CM_MOVADST_CG1, - CM_MOVADST_CG2, - CM_MOVADST_CG3 -}; - -enum alu_cayman_exec_mask_op { - CM_EMO_DEACTIVATE, - CM_EMO_BREAK, - CM_EMO_CONTINUE, - CM_EMO_KILL -}; - - -enum cf_exp_type { - EXP_PIXEL, - EXP_POS, - EXP_PARAM, - - EXP_TYPE_COUNT -}; - -enum cf_mem_type { - MEM_WRITE, - MEM_WRITE_IND, - MEM_WRITE_ACK, - MEM_WRITE_IND_ACK -}; - - -enum alu_kcache_mode { - KC_LOCK_NONE, - KC_LOCK_1, - KC_LOCK_2, - KC_LOCK_LOOP -}; - -enum alu_kcache_index_mode { - KC_INDEX_NONE, - KC_INDEX_0, - KC_INDEX_1, - KC_INDEX_INVALID -}; - -enum chan_select { - SEL_X = 0, - SEL_Y = 1, - SEL_Z = 2, - SEL_W = 3, - SEL_0 = 4, - SEL_1 = 5, -// RESERVED = 6, - SEL_MASK = 7 -}; - -enum bank_swizzle { - VEC_012 = 0, - VEC_021 = 1, - VEC_120 = 2, - VEC_102 = 3, - VEC_201 = 4, - VEC_210 = 5, - - VEC_NUM = 6, - - SCL_210 = 0, - SCL_122 = 1, - SCL_212 = 2, - SCL_221 = 3, - - SCL_NUM = 4 - -}; - -enum sched_queue_id { - SQ_CF, - SQ_ALU, - SQ_TEX, - SQ_VTX, - SQ_GDS, - - SQ_NUM -}; - -struct literal { - union { - int32_t i; - uint32_t u; - float f; - }; - - literal(int32_t i = 0) : i(i) {} - literal(uint32_t u) : u(u) {} - literal(float f) : f(f) {} - literal(double f) : f(f) {} - operator uint32_t() const { return u; } - bool operator ==(literal l) { return u == l.u; } - bool operator ==(int v_int) { return i == v_int; } - bool operator ==(unsigned v_uns) { return u == v_uns; } -}; - -struct bc_kcache { - unsigned mode; - unsigned bank; - unsigned addr; - unsigned index_mode; -} ; - -// TODO optimize bc structures - -struct bc_cf { - - bc_kcache kc[4]; - - unsigned id; - - - const cf_op_info * op_ptr; - unsigned op; - - unsigned addr:32; - - unsigned alt_const:1; - unsigned uses_waterfall:1; - - unsigned barrier:1; - unsigned count:7; - unsigned pop_count:3; - unsigned call_count:6; - unsigned whole_quad_mode:1; - unsigned valid_pixel_mode:1; - - unsigned jumptable_sel:3; - unsigned cf_const:5; - unsigned cond:2; - unsigned end_of_program:1; - - unsigned array_base:13; - unsigned elem_size:2; - unsigned index_gpr:7; - unsigned rw_gpr:7; - unsigned rw_rel:1; - unsigned type:2; - - unsigned burst_count:4; - unsigned mark:1; - unsigned sel[4]; - - unsigned array_size:12; - unsigned comp_mask:4; - - unsigned rat_id:4; - unsigned rat_inst:6; - unsigned rat_index_mode:2; - - void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); } - - bool is_alu_extended() { - assert(op_ptr->flags & CF_ALU); - return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE || - kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE || - kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE; - } - -}; - -struct bc_alu_src { - unsigned sel:9; - unsigned chan:2; - unsigned neg:1; - unsigned abs:1; - unsigned rel:1; - literal value; - - void clear() { - sel = 0; - chan = 0; - neg = 0; - abs = 0; - rel = 0; - value = 0; - } -}; - -struct bc_alu { - const alu_op_info * op_ptr; - unsigned op; - - bc_alu_src src[3]; - - unsigned dst_gpr:7; - unsigned dst_chan:2; - unsigned dst_rel:1; - unsigned clamp:1; - unsigned omod:2; - unsigned bank_swizzle:3; - - unsigned index_mode:3; - unsigned last:1; - unsigned pred_sel:2; - - unsigned fog_merge:1; - unsigned write_mask:1; - unsigned update_exec_mask:1; - unsigned update_pred:1; - - unsigned slot:3; - - unsigned lds_idx_offset:6; - - alu_op_flags slot_flags; - - void set_op(unsigned op) { - this->op = op; - op_ptr = r600_isa_alu(op); - } - void clear() { - op_ptr = nullptr; - op = 0; - for (int i = 0; i < 3; ++i) - src[i].clear(); - dst_gpr = 0; - dst_chan = 0; - dst_rel = 0; - clamp = 0; - omod = 0; - bank_swizzle = 0; - index_mode = 0; - last = 0; - pred_sel = 0; - fog_merge = 0; - write_mask = 0; - update_exec_mask = 0; - update_pred = 0; - slot = 0; - lds_idx_offset = 0; - slot_flags = AF_NONE; - } - bc_alu() { - clear(); - } -}; - -struct bc_fetch { - const fetch_op_info * op_ptr; - unsigned op; - - unsigned bc_frac_mode:1; - unsigned fetch_whole_quad:1; - unsigned resource_id:8; - - unsigned src_gpr:7; - unsigned src_rel:1; - unsigned src_rel_global:1; /* for GDS ops */ - unsigned src_sel[4]; - - unsigned dst_gpr:7; - unsigned dst_rel:1; - unsigned dst_rel_global:1; /* for GDS ops */ - unsigned dst_sel[4]; - - unsigned alt_const:1; - - unsigned inst_mod:2; - unsigned resource_index_mode:2; - unsigned sampler_index_mode:2; - - unsigned coord_type[4]; - unsigned lod_bias:7; - - unsigned offset[3]; - - unsigned sampler_id:5; - - - unsigned fetch_type:2; - unsigned mega_fetch_count:6; - unsigned coalesced_read:1; - unsigned structured_read:2; - unsigned lds_req:1; - - unsigned data_format:6; - unsigned format_comp_all:1; - unsigned num_format_all:2; - unsigned semantic_id:8; - unsigned srf_mode_all:1; - unsigned use_const_fields:1; - - unsigned const_buf_no_stride:1; - unsigned endian_swap:2; - unsigned mega_fetch:1; - - unsigned src2_gpr:7; /* for GDS */ - unsigned alloc_consume:1; - unsigned uav_id:4; - unsigned uav_index_mode:2; - unsigned bcast_first_req:1; - - /* for MEM ops */ - unsigned elem_size:2; - unsigned uncached:1; - unsigned indexed:1; - unsigned burst_count:4; - unsigned array_base:13; - unsigned array_size:12; - - void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); } -}; - -struct shader_stats { - unsigned ndw; - unsigned ngpr; - unsigned nstack; - - unsigned cf; // clause instructions not included - unsigned alu; - unsigned alu_clauses; - unsigned fetch_clauses; - unsigned fetch; - unsigned alu_groups; - - unsigned shaders; // number of shaders (for accumulated stats) - - shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(), - fetch_clauses(), fetch(), alu_groups(), shaders() {} - - void collect(node *n); - void accumulate(shader_stats &s); - void dump(); - void dump_diff(shader_stats &s); -}; - -class sb_context { - -public: - - shader_stats src_stats, opt_stats; - - r600_isa *isa; - - sb_hw_chip hw_chip; - sb_hw_class hw_class; - - unsigned alu_temp_gprs; - unsigned max_fetch; - bool has_trans; - unsigned vtx_src_num; - unsigned num_slots; - bool uses_mova_gpr; - - bool r6xx_gpr_index_workaround; - - bool stack_workaround_8xx; - bool stack_workaround_9xx; - - unsigned wavefront_size; - unsigned stack_entry_size; - - static unsigned dump_pass; - static unsigned dump_stat; - - static unsigned dry_run; - static unsigned no_fallback; - static unsigned safe_math; - - static unsigned dskip_start; - static unsigned dskip_end; - static unsigned dskip_mode; - - sb_context() : src_stats(), opt_stats(), isa(0), - hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN), - alu_temp_gprs(0), max_fetch(0), has_trans(false), vtx_src_num(0), - num_slots(0), uses_mova_gpr(false), - r6xx_gpr_index_workaround(false), stack_workaround_8xx(false), - stack_workaround_9xx(false), wavefront_size(0), - stack_entry_size(0) {} - - int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass); - - bool is_r600() {return hw_class == HW_CLASS_R600;} - bool is_r700() {return hw_class == HW_CLASS_R700;} - bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;} - bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;} - bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;} - - bool needs_8xx_stack_workaround() { - if (!is_evergreen()) - return false; - - switch (hw_chip) { - case HW_CHIP_HEMLOCK: - case HW_CHIP_CYPRESS: - case HW_CHIP_JUNIPER: - return false; - default: - return true; - } - } - - bool needs_9xx_stack_workaround() { - return is_cayman(); - } - - sb_hw_class_bits hw_class_bit() { - switch (hw_class) { - case HW_CLASS_R600:return HB_R6; - case HW_CLASS_R700:return HB_R7; - case HW_CLASS_EVERGREEN:return HB_EG; - case HW_CLASS_CAYMAN:return HB_CM; - default: assert(!"unknown hw class"); return (sb_hw_class_bits)0; - - } - } - - unsigned cf_opcode(unsigned op) { - return r600_isa_cf_opcode(isa->hw_class, op); - } - - unsigned alu_opcode(unsigned op) { - return r600_isa_alu_opcode(isa->hw_class, op); - } - - unsigned alu_slots(unsigned op) { - return r600_isa_alu_slots(isa->hw_class, op); - } - - unsigned alu_slots(const alu_op_info * op_ptr) { - return op_ptr->slots[isa->hw_class]; - } - - unsigned alu_slots_mask(const alu_op_info * op_ptr) { - unsigned mask = 0; - unsigned slot_flags = alu_slots(op_ptr); - if (slot_flags & AF_V) - mask = 0x0F; - if (!is_cayman() && (slot_flags & AF_S)) - mask |= 0x10; - /* Force LDS_IDX ops into SLOT_X */ - if (op_ptr->opcode[0] == -1 && ((op_ptr->opcode[1] & 0xFF) == 0x11)) - mask = 0x01; - return mask; - } - - unsigned fetch_opcode(unsigned op) { - return r600_isa_fetch_opcode(isa->hw_class, op); - } - - bool is_kcache_sel(unsigned sel) { - return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320)); - } - - bool is_lds_oq(unsigned sel) { - return (sel >= 0xdb && sel <= 0xde); - } - - const char * get_hw_class_name(); - const char * get_hw_chip_name(); - -}; - -#define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0) -#define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0) - -class bc_decoder { - - sb_context &ctx; - - uint32_t* dw; - unsigned ndw; - -public: - - bc_decoder(sb_context &sctx, uint32_t *data, unsigned size) - : ctx(sctx), dw(data), ndw(size) {} - - int decode_cf(unsigned &i, bc_cf &bc); - int decode_alu(unsigned &i, bc_alu &bc); - int decode_fetch(unsigned &i, bc_fetch &bc); - -private: - int decode_cf_alu(unsigned &i, bc_cf &bc); - int decode_cf_exp(unsigned &i, bc_cf &bc); - int decode_cf_mem(unsigned &i, bc_cf &bc); - - int decode_fetch_vtx(unsigned &i, bc_fetch &bc); - int decode_fetch_gds(unsigned &i, bc_fetch &bc); - int decode_fetch_mem(unsigned &i, bc_fetch &bc); -}; - -// bytecode format definition - -class hw_encoding_format { - const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing - hw_encoding_format(); -protected: - uint32_t value; -public: - hw_encoding_format(sb_hw_class_bits hw) - : hw_target(hw), value(0) {} - hw_encoding_format(uint32_t v, sb_hw_class_bits hw) - : hw_target(hw), value(v) {} - uint32_t get_value(sb_hw_class_bits hw) const { - assert((hw & hw_target) == hw); - return value; - } -}; - -#define BC_FORMAT_BEGIN_HW(fmt, hwset) \ -class fmt##_##hwset : public hw_encoding_format {\ - typedef fmt##_##hwset thistype; \ -public: \ - fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \ - fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {}; - -#define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL) - -#define BC_FORMAT_END(fmt) }; - -// bytecode format field definition - -#define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \ - thistype & name(unsigned v) { \ - value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \ - return *this; \ - } \ - unsigned get_##name() const { \ - return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \ - } - -#define BC_RSRVD(fmt, last_bit, first_bit) - -// CLAMP macro defined elsewhere interferes with bytecode field name -#undef CLAMP -#include "sb_bc_fmt_def.inc" - -#undef BC_FORMAT_BEGIN -#undef BC_FORMAT_END -#undef BC_FIELD -#undef BC_RSRVD - -class bc_parser { - sb_context & ctx; - - bc_decoder *dec; - - r600_bytecode *bc; - r600_shader *pshader; - - uint32_t *dw; - unsigned bc_ndw; - - unsigned max_cf; - - shader *sh; - - int error; - - alu_node *slots[2][5]; - unsigned cgroup; - - typedef std::vector id_cf_map; - id_cf_map cf_map; - - typedef std::stack region_stack; - region_stack loop_stack; - - bool gpr_reladdr; - - // Note: currently relies on input emitting SET_CF in same basic block as uses - value *cf_index_value[2]; - alu_node *mova; -public: - - bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) : - ctx(sctx), dec(), bc(bc), pshader(pshader), - dw(), bc_ndw(), max_cf(), - sh(), error(), slots(), cgroup(), - cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { } - - int decode(); - int prepare(); - - shader* get_shader() { assert(!error); return sh; } - -private: - - int decode_shader(); - - int parse_decls(); - - int decode_cf(unsigned &i, bool &eop); - - int decode_alu_clause(cf_node *cf); - int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt); - - int decode_fetch_clause(cf_node *cf); - - int prepare_ir(); - int prepare_alu_clause(cf_node *cf); - int prepare_alu_group(cf_node* cf, alu_group_node *g); - int prepare_fetch_clause(cf_node *cf); - - int prepare_loop(cf_node *c); - int prepare_if(cf_node *c); - - void save_set_cf_index(value *val, unsigned idx); - value *get_cf_index_value(unsigned idx); - void save_mova(alu_node *mova); - alu_node *get_mova(); -}; - - - - -class bytecode { - typedef std::vector bc_vector; - sb_hw_class_bits hw_class_bit; - - bc_vector bc; - - unsigned pos; - -public: - - bytecode(sb_hw_class_bits hw, unsigned rdw = 256) - : hw_class_bit(hw), pos(0) { bc.reserve(rdw); } - - unsigned ndw() { return bc.size(); } - - void write_data(uint32_t* dst) { - std::copy(bc.begin(), bc.end(), dst); - } - - void align(unsigned a) { - unsigned size = bc.size(); - size = (size + a - 1) & ~(a-1); - bc.resize(size); - } - - void set_size(unsigned sz) { - assert(sz >= bc.size()); - bc.resize(sz); - } - - void seek(unsigned p) { - if (p != pos) { - if (p > bc.size()) { - bc.resize(p); - } - pos = p; - } - } - - unsigned get_pos() { return pos; } - uint32_t *data() { return &bc[0]; } - - bytecode & operator <<(uint32_t v) { - if (pos == ndw()) { - bc.push_back(v); - } else - bc.at(pos) = v; - ++pos; - return *this; - } - - bytecode & operator <<(const hw_encoding_format &e) { - *this << e.get_value(hw_class_bit); - return *this; - } - - bytecode & operator <<(const bytecode &b) { - bc.insert(bc.end(), b.bc.begin(), b.bc.end()); - return *this; - } - - uint32_t at(unsigned dw_id) { return bc.at(dw_id); } -}; - - -class bc_builder { - shader &sh; - sb_context &ctx; - bytecode bb; - int error; - -public: - - bc_builder(shader &s); - int build(); - bytecode& get_bytecode() { assert(!error); return bb; } - -private: - - int build_cf(cf_node *n); - - int build_cf_alu(cf_node *n); - int build_cf_mem(cf_node *n); - int build_cf_exp(cf_node *n); - - int build_alu_clause(cf_node *n); - int build_alu_group(alu_group_node *n); - int build_alu(alu_node *n); - - int build_fetch_clause(cf_node *n); - int build_fetch_tex(fetch_node *n); - int build_fetch_vtx(fetch_node *n); - int build_fetch_gds(fetch_node *n); - int build_fetch_mem(fetch_node* n); -}; - -} // namespace r600_sb - -#endif /* SB_BC_H_ */ diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp deleted file mode 100644 index f13f0d7..0000000 --- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp +++ /dev/null @@ -1,746 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include "sb_bc.h" -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -bc_builder::bc_builder(shader &s) - : sh(s), ctx(s.get_ctx()), bb(ctx.hw_class_bit()), error(0) {} - -int bc_builder::build() { - - container_node *root = sh.root; - int cf_cnt = 0; - - // FIXME reserve total size to avoid reallocs - - for (node_iterator it = root->begin(), end = root->end(); - it != end; ++it) { - - cf_node *cf = static_cast(*it); - assert(cf->is_cf_inst() || cf->is_alu_clause() || cf->is_fetch_clause()); - - cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags; - - cf->bc.id = cf_cnt++; - - if (flags & CF_ALU) { - if (cf->bc.is_alu_extended()) - cf_cnt++; - } - } - - bb.set_size(cf_cnt << 1); - bb.seek(cf_cnt << 1); - - unsigned cf_pos = 0; - - for (node_iterator I = root->begin(), end = root->end(); - I != end; ++I) { - - cf_node *cf = static_cast(*I); - cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags; - - if (flags & CF_ALU) { - bb.seek(bb.ndw()); - cf->bc.addr = bb.ndw() >> 1; - build_alu_clause(cf); - cf->bc.count = (bb.ndw() >> 1) - cf->bc.addr - 1; - } else if (flags & CF_FETCH) { - bb.align(4); - bb.seek(bb.ndw()); - cf->bc.addr = bb.ndw() >> 1; - build_fetch_clause(cf); - cf->bc.count = (((bb.ndw() >> 1) - cf->bc.addr) >> 1) - 1; - } else if (cf->jump_target) { - cf->bc.addr = cf->jump_target->bc.id; - if (cf->jump_after_target) - cf->bc.addr += 1; - } - - bb.seek(cf_pos); - build_cf(cf); - cf_pos = bb.get_pos(); - } - - return 0; -} - -int bc_builder::build_alu_clause(cf_node* n) { - for (node_iterator I = n->begin(), E = n->end(); - I != E; ++I) { - - alu_group_node *g = static_cast(*I); - assert(g->is_valid()); - - build_alu_group(g); - } - return 0; -} - -int bc_builder::build_alu_group(alu_group_node* n) { - - for (node_iterator I = n->begin(), E = n->end(); - I != E; ++I) { - - alu_node *a = static_cast(*I); - assert(a->is_valid()); - build_alu(a); - } - - for(int i = 0, ls = n->literals.size(); i < ls; ++i) { - bb << n->literals.at(i).u; - } - - bb.align(2); - bb.seek(bb.ndw()); - - return 0; -} - -int bc_builder::build_fetch_clause(cf_node* n) { - for (node_iterator I = n->begin(), E = n->end(); - I != E; ++I) { - fetch_node *f = static_cast(*I); - - if (f->bc.op_ptr->flags & FF_GDS) - build_fetch_gds(f); - else if (f->bc.op_ptr->flags & FF_MEM) - build_fetch_mem(f); - else if (f->bc.op_ptr->flags & FF_VTX) - build_fetch_vtx(f); - else - build_fetch_tex(f); - } - return 0; -} - - -int bc_builder::build_cf(cf_node* n) { - const bc_cf &bc = n->bc; - const cf_op_info *cfop = bc.op_ptr; - - if (cfop->flags & CF_ALU) - return build_cf_alu(n); - if (cfop->flags & (CF_EXP | CF_MEM)) - return build_cf_exp(n); - - if (ctx.is_egcm()) { - bb << CF_WORD0_EGCM() - .ADDR(bc.addr) - .JUMPTABLE_SEL(bc.jumptable_sel); - - if (ctx.is_evergreen()) - - bb << CF_WORD1_EG() - .BARRIER(bc.barrier) - .CF_CONST(bc.cf_const) - .CF_INST(ctx.cf_opcode(bc.op)) - .COND(bc.cond) - .COUNT(bc.count) - .END_OF_PROGRAM(bc.end_of_program) - .POP_COUNT(bc.pop_count) - .VALID_PIXEL_MODE(bc.valid_pixel_mode) - .WHOLE_QUAD_MODE(bc.whole_quad_mode); - - else //cayman - - bb << CF_WORD1_CM() - .BARRIER(bc.barrier) - .CF_CONST(bc.cf_const) - .CF_INST(ctx.cf_opcode(bc.op)) - .COND(bc.cond) - .COUNT(bc.count) - .POP_COUNT(bc.pop_count) - .VALID_PIXEL_MODE(bc.valid_pixel_mode); - } else { - bb << CF_WORD0_R6R7() - .ADDR(bc.addr); - - assert(bc.count < ctx.max_fetch); - - bb << CF_WORD1_R6R7() - .BARRIER(bc.barrier) - .CALL_COUNT(bc.call_count) - .CF_CONST(bc.cf_const) - .CF_INST(ctx.cf_opcode(bc.op)) - .COND(bc.cond) - .COUNT(bc.count & 7) - .COUNT_3(bc.count >> 3) - .END_OF_PROGRAM(bc.end_of_program) - .POP_COUNT(bc.pop_count) - .VALID_PIXEL_MODE(bc.valid_pixel_mode) - .WHOLE_QUAD_MODE(bc.whole_quad_mode); - } - - return 0; -} - -int bc_builder::build_cf_alu(cf_node* n) { - const bc_cf &bc = n->bc; - - assert(bc.count < 128); - - if (n->bc.is_alu_extended()) { - assert(ctx.is_egcm()); - - bb << CF_ALU_WORD0_EXT_EGCM() - .KCACHE_BANK2(bc.kc[2].bank) - .KCACHE_BANK3(bc.kc[3].bank) - .KCACHE_BANK_INDEX_MODE0(bc.kc[0].index_mode) - .KCACHE_BANK_INDEX_MODE1(bc.kc[1].index_mode) - .KCACHE_BANK_INDEX_MODE2(bc.kc[2].index_mode) - .KCACHE_BANK_INDEX_MODE3(bc.kc[3].index_mode) - .KCACHE_MODE2(bc.kc[2].mode); - - bb << CF_ALU_WORD1_EXT_EGCM() - .BARRIER(bc.barrier) - .CF_INST(ctx.cf_opcode(CF_OP_ALU_EXT)) - .KCACHE_ADDR2(bc.kc[2].addr) - .KCACHE_ADDR3(bc.kc[3].addr) - .KCACHE_MODE3(bc.kc[3].mode); - } - - bb << CF_ALU_WORD0_ALL() - .ADDR(bc.addr) - .KCACHE_BANK0(bc.kc[0].bank) - .KCACHE_BANK1(bc.kc[1].bank) - .KCACHE_MODE0(bc.kc[0].mode); - - assert(bc.count < 128); - - if (ctx.is_r600()) - bb << CF_ALU_WORD1_R6() - .BARRIER(bc.barrier) - .CF_INST(ctx.cf_opcode(bc.op)) - .COUNT(bc.count) - .KCACHE_ADDR0(bc.kc[0].addr) - .KCACHE_ADDR1(bc.kc[1].addr) - .KCACHE_MODE1(bc.kc[1].mode) - .USES_WATERFALL(bc.uses_waterfall) - .WHOLE_QUAD_MODE(bc.whole_quad_mode); - else - bb << CF_ALU_WORD1_R7EGCM() - .ALT_CONST(bc.alt_const) - .BARRIER(bc.barrier) - .CF_INST(ctx.cf_opcode(bc.op)) - .COUNT(bc.count) - .KCACHE_ADDR0(bc.kc[0].addr) - .KCACHE_ADDR1(bc.kc[1].addr) - .KCACHE_MODE1(bc.kc[1].mode) - .WHOLE_QUAD_MODE(bc.whole_quad_mode); - - return 0; -} - -int bc_builder::build_cf_exp(cf_node* n) { - const bc_cf &bc = n->bc; - const cf_op_info *cfop = bc.op_ptr; - - if (cfop->flags & CF_RAT) { - assert(ctx.is_egcm()); - - bb << CF_ALLOC_EXPORT_WORD0_RAT_EGCM() - .ELEM_SIZE(bc.elem_size) - .INDEX_GPR(bc.index_gpr) - .RAT_ID(bc.rat_id) - .RAT_INDEX_MODE(bc.rat_index_mode) - .RAT_INST(bc.rat_inst) - .RW_GPR(bc.rw_gpr) - .RW_REL(bc.rw_rel) - .TYPE(bc.type); - } else { - - bb << CF_ALLOC_EXPORT_WORD0_ALL() - .ARRAY_BASE(bc.array_base) - .ELEM_SIZE(bc.elem_size) - .INDEX_GPR(bc.index_gpr) - .RW_GPR(bc.rw_gpr) - .RW_REL(bc.rw_rel) - .TYPE(bc.type); - } - - if (cfop->flags & CF_EXP) { - - if (!ctx.is_egcm()) - bb << CF_ALLOC_EXPORT_WORD1_SWIZ_R6R7() - .BARRIER(bc.barrier) - .BURST_COUNT(bc.burst_count) - .CF_INST(ctx.cf_opcode(bc.op)) - .END_OF_PROGRAM(bc.end_of_program) - .SEL_X(bc.sel[0]) - .SEL_Y(bc.sel[1]) - .SEL_Z(bc.sel[2]) - .SEL_W(bc.sel[3]) - .VALID_PIXEL_MODE(bc.valid_pixel_mode) - .WHOLE_QUAD_MODE(bc.whole_quad_mode); - - else if (ctx.is_evergreen()) - bb << CF_ALLOC_EXPORT_WORD1_SWIZ_EG() - .BARRIER(bc.barrier) - .BURST_COUNT(bc.burst_count) - .CF_INST(ctx.cf_opcode(bc.op)) - .END_OF_PROGRAM(bc.end_of_program) - .MARK(bc.mark) - .SEL_X(bc.sel[0]) - .SEL_Y(bc.sel[1]) - .SEL_Z(bc.sel[2]) - .SEL_W(bc.sel[3]) - .VALID_PIXEL_MODE(bc.valid_pixel_mode); - - else // cayman - bb << CF_ALLOC_EXPORT_WORD1_SWIZ_CM() - .BARRIER(bc.barrier) - .BURST_COUNT(bc.burst_count) - .CF_INST(ctx.cf_opcode(bc.op)) - .MARK(bc.mark) - .SEL_X(bc.sel[0]) - .SEL_Y(bc.sel[1]) - .SEL_Z(bc.sel[2]) - .SEL_W(bc.sel[3]) - .VALID_PIXEL_MODE(bc.valid_pixel_mode); - - } else if (cfop->flags & CF_MEM) { - return build_cf_mem(n); - } - - return 0; -} - -int bc_builder::build_cf_mem(cf_node* n) { - const bc_cf &bc = n->bc; - - if (!ctx.is_egcm()) - bb << CF_ALLOC_EXPORT_WORD1_BUF_R6R7() - .ARR_SIZE(bc.array_size) - .BARRIER(bc.barrier) - .BURST_COUNT(bc.burst_count) - .CF_INST(ctx.cf_opcode(bc.op)) - .COMP_MASK(bc.comp_mask) - .END_OF_PROGRAM(bc.end_of_program) - .VALID_PIXEL_MODE(bc.valid_pixel_mode) - .WHOLE_QUAD_MODE(bc.whole_quad_mode); - - else if (ctx.is_evergreen()) - bb << CF_ALLOC_EXPORT_WORD1_BUF_EG() - .ARR_SIZE(bc.array_size) - .BARRIER(bc.barrier) - .BURST_COUNT(bc.burst_count) - .CF_INST(ctx.cf_opcode(bc.op)) - .COMP_MASK(bc.comp_mask) - .END_OF_PROGRAM(bc.end_of_program) - .MARK(bc.mark) - .VALID_PIXEL_MODE(bc.valid_pixel_mode); - - else // cayman - bb << CF_ALLOC_EXPORT_WORD1_BUF_CM() - .ARR_SIZE(bc.array_size) - .BARRIER(bc.barrier) - .BURST_COUNT(bc.burst_count) - .CF_INST(ctx.cf_opcode(bc.op)) - .COMP_MASK(bc.comp_mask) - .MARK(bc.mark) - .VALID_PIXEL_MODE(bc.valid_pixel_mode); - - return 0; -} - -int bc_builder::build_alu(alu_node* n) { - const bc_alu &bc = n->bc; - const alu_op_info *aop = bc.op_ptr; - - if (n->bc.op_ptr->flags & AF_LDS) { - assert(ctx.is_egcm()); - bb << ALU_WORD0_LDS_IDX_OP_EGCM() - .SRC0_SEL(bc.src[0].sel) - .SRC0_REL(bc.src[0].rel) - .SRC0_CHAN(bc.src[0].chan) - .IDX_OFFSET_4((bc.lds_idx_offset >> 4) & 1) - .SRC1_SEL(bc.src[1].sel) - .SRC1_REL(bc.src[1].rel) - .SRC1_CHAN(bc.src[1].chan) - .IDX_OFFSET_5((bc.lds_idx_offset >> 5) & 1) - .INDEX_MODE(bc.index_mode) - .PRED_SEL(bc.pred_sel) - .LAST(bc.last); - - bb << ALU_WORD1_LDS_IDX_OP_EGCM() - .SRC2_SEL(bc.src[2].sel) - .SRC2_REL(bc.src[2].rel) - .SRC2_CHAN(bc.src[2].chan) - .IDX_OFFSET_1((bc.lds_idx_offset >> 1) & 1) - .ALU_INST(ctx.alu_opcode(ALU_OP3_LDS_IDX_OP)) - .BANK_SWIZZLE(bc.bank_swizzle) - .LDS_OP((bc.op_ptr->opcode[1] >> 8) & 0xff) - .IDX_OFFSET_0((bc.lds_idx_offset >> 0) & 1) - .IDX_OFFSET_2((bc.lds_idx_offset >> 2) & 1) - .DST_CHAN(bc.dst_chan) - .IDX_OFFSET_3((bc.lds_idx_offset >> 3) & 1); - - return 0; - } - - bb << ALU_WORD0_ALL() - .INDEX_MODE(bc.index_mode) - .LAST(bc.last) - .PRED_SEL(bc.pred_sel) - .SRC0_SEL(bc.src[0].sel) - .SRC0_CHAN(bc.src[0].chan) - .SRC0_NEG(bc.src[0].neg) - .SRC0_REL(bc.src[0].rel) - .SRC1_SEL(bc.src[1].sel) - .SRC1_CHAN(bc.src[1].chan) - .SRC1_NEG(bc.src[1].neg) - .SRC1_REL(bc.src[1].rel); - - if (aop->src_count<3) { - if (ctx.is_r600()) - bb << ALU_WORD1_OP2_R6() - .ALU_INST(ctx.alu_opcode(bc.op)) - .BANK_SWIZZLE(bc.bank_swizzle) - .CLAMP(bc.clamp) - .DST_GPR(bc.dst_gpr) - .DST_CHAN(bc.dst_chan) - .DST_REL(bc.dst_rel) - .FOG_MERGE(bc.fog_merge) - .OMOD(bc.omod) - .SRC0_ABS(bc.src[0].abs) - .SRC1_ABS(bc.src[1].abs) - .UPDATE_EXEC_MASK(bc.update_exec_mask) - .UPDATE_PRED(bc.update_pred) - .WRITE_MASK(bc.write_mask); - else { - - if (ctx.is_cayman() && (aop->flags & AF_MOVA)) { - - bb << ALU_WORD1_OP2_MOVA_CM() - .ALU_INST(ctx.alu_opcode(bc.op)) - .BANK_SWIZZLE(bc.bank_swizzle) - .CLAMP(bc.clamp) - .MOVA_DST(bc.dst_gpr) - .DST_CHAN(bc.dst_chan) - .DST_REL(bc.dst_rel) - .OMOD(bc.omod) - .UPDATE_EXEC_MASK(bc.update_exec_mask) - .UPDATE_PRED(bc.update_pred) - .WRITE_MASK(bc.write_mask) - .SRC0_ABS(bc.src[0].abs) - .SRC1_ABS(bc.src[1].abs); - - } else if (ctx.is_cayman() && (aop->flags & (AF_PRED|AF_KILL))) { - bb << ALU_WORD1_OP2_EXEC_MASK_CM() - .ALU_INST(ctx.alu_opcode(bc.op)) - .BANK_SWIZZLE(bc.bank_swizzle) - .CLAMP(bc.clamp) - .DST_CHAN(bc.dst_chan) - .DST_REL(bc.dst_rel) - .EXECUTE_MASK_OP(bc.omod) - .UPDATE_EXEC_MASK(bc.update_exec_mask) - .UPDATE_PRED(bc.update_pred) - .WRITE_MASK(bc.write_mask) - .SRC0_ABS(bc.src[0].abs) - .SRC1_ABS(bc.src[1].abs); - - } else - bb << ALU_WORD1_OP2_R7EGCM() - .ALU_INST(ctx.alu_opcode(bc.op)) - .BANK_SWIZZLE(bc.bank_swizzle) - .CLAMP(bc.clamp) - .DST_GPR(bc.dst_gpr) - .DST_CHAN(bc.dst_chan) - .DST_REL(bc.dst_rel) - .OMOD(bc.omod) - .UPDATE_EXEC_MASK(bc.update_exec_mask) - .UPDATE_PRED(bc.update_pred) - .WRITE_MASK(bc.write_mask) - .SRC0_ABS(bc.src[0].abs) - .SRC1_ABS(bc.src[1].abs); - - } - } else - bb << ALU_WORD1_OP3_ALL() - .ALU_INST(ctx.alu_opcode(bc.op)) - .BANK_SWIZZLE(bc.bank_swizzle) - .CLAMP(bc.clamp) - .DST_GPR(bc.dst_gpr) - .DST_CHAN(bc.dst_chan) - .DST_REL(bc.dst_rel) - .SRC2_SEL(bc.src[2].sel) - .SRC2_CHAN(bc.src[2].chan) - .SRC2_NEG(bc.src[2].neg) - .SRC2_REL(bc.src[2].rel); - return 0; -} - -int bc_builder::build_fetch_tex(fetch_node* n) { - const bc_fetch &bc = n->bc; - ASSERTED const fetch_op_info *fop = bc.op_ptr; - - assert(!(fop->flags & FF_VTX)); - - if (ctx.is_r600()) - bb << TEX_WORD0_R6() - .BC_FRAC_MODE(bc.bc_frac_mode) - .FETCH_WHOLE_QUAD(bc.fetch_whole_quad) - .RESOURCE_ID(bc.resource_id) - .SRC_GPR(bc.src_gpr) - .SRC_REL(bc.src_rel) - .TEX_INST(ctx.fetch_opcode(bc.op)); - - else if (ctx.is_r700()) - bb << TEX_WORD0_R7() - .ALT_CONST(bc.alt_const) - .BC_FRAC_MODE(bc.bc_frac_mode) - .FETCH_WHOLE_QUAD(bc.fetch_whole_quad) - .RESOURCE_ID(bc.resource_id) - .SRC_GPR(bc.src_gpr) - .SRC_REL(bc.src_rel) - .TEX_INST(ctx.fetch_opcode(bc.op)); - - else - bb << TEX_WORD0_EGCM() - .ALT_CONST(bc.alt_const) - .FETCH_WHOLE_QUAD(bc.fetch_whole_quad) - .INST_MOD(bc.inst_mod) - .RESOURCE_ID(bc.resource_id) - .RESOURCE_INDEX_MODE(bc.resource_index_mode) - .SAMPLER_INDEX_MODE(bc.sampler_index_mode) - .SRC_GPR(bc.src_gpr) - .SRC_REL(bc.src_rel) - .TEX_INST(ctx.fetch_opcode(bc.op)); - - bb << TEX_WORD1_ALL() - .COORD_TYPE_X(bc.coord_type[0]) - .COORD_TYPE_Y(bc.coord_type[1]) - .COORD_TYPE_Z(bc.coord_type[2]) - .COORD_TYPE_W(bc.coord_type[3]) - .DST_GPR(bc.dst_gpr) - .DST_REL(bc.dst_rel) - .DST_SEL_X(bc.dst_sel[0]) - .DST_SEL_Y(bc.dst_sel[1]) - .DST_SEL_Z(bc.dst_sel[2]) - .DST_SEL_W(bc.dst_sel[3]) - .LOD_BIAS(bc.lod_bias); - - bb << TEX_WORD2_ALL() - .OFFSET_X(bc.offset[0]) - .OFFSET_Y(bc.offset[1]) - .OFFSET_Z(bc.offset[2]) - .SAMPLER_ID(bc.sampler_id) - .SRC_SEL_X(bc.src_sel[0]) - .SRC_SEL_Y(bc.src_sel[1]) - .SRC_SEL_Z(bc.src_sel[2]) - .SRC_SEL_W(bc.src_sel[3]); - - bb << 0; - return 0; -} - -int bc_builder::build_fetch_gds(fetch_node *n) { - const bc_fetch &bc = n->bc; - ASSERTED const fetch_op_info *fop = bc.op_ptr; - unsigned gds_op = (ctx.fetch_opcode(bc.op) >> 8) & 0x3f; - unsigned mem_op = 4; - assert(fop->flags & FF_GDS); - - if (bc.op == FETCH_OP_TF_WRITE) { - mem_op = 5; - gds_op = 0; - } - - bb << MEM_GDS_WORD0_EGCM() - .MEM_INST(2) - .MEM_OP(mem_op) - .SRC_GPR(bc.src_gpr) - .SRC_SEL_X(bc.src_sel[0]) - .SRC_SEL_Y(bc.src_sel[1]) - .SRC_SEL_Z(bc.src_sel[2]); - - bb << MEM_GDS_WORD1_EGCM() - .DST_GPR(bc.dst_gpr) - .DST_REL_MODE(bc.dst_rel) - .GDS_OP(gds_op) - .SRC_GPR(bc.src2_gpr) - .UAV_INDEX_MODE(bc.uav_index_mode) - .UAV_ID(bc.uav_id) - .ALLOC_CONSUME(bc.alloc_consume) - .BCAST_FIRST_REQ(bc.bcast_first_req); - - bb << MEM_GDS_WORD2_EGCM() - .DST_SEL_X(bc.dst_sel[0]) - .DST_SEL_Y(bc.dst_sel[1]) - .DST_SEL_Z(bc.dst_sel[2]) - .DST_SEL_W(bc.dst_sel[3]); - - bb << 0; - return 0; -} - -int bc_builder::build_fetch_vtx(fetch_node* n) { - const bc_fetch &bc = n->bc; - ASSERTED const fetch_op_info *fop = bc.op_ptr; - - assert(fop->flags & FF_VTX); - - if (!ctx.is_cayman()) - bb << VTX_WORD0_R6R7EG() - .BUFFER_ID(bc.resource_id) - .FETCH_TYPE(bc.fetch_type) - .FETCH_WHOLE_QUAD(bc.fetch_whole_quad) - .MEGA_FETCH_COUNT(bc.mega_fetch_count) - .SRC_GPR(bc.src_gpr) - .SRC_REL(bc.src_rel) - .SRC_SEL_X(bc.src_sel[0]) - .VC_INST(ctx.fetch_opcode(bc.op)); - - else - bb << VTX_WORD0_CM() - .BUFFER_ID(bc.resource_id) - .COALESCED_READ(bc.coalesced_read) - .FETCH_TYPE(bc.fetch_type) - .FETCH_WHOLE_QUAD(bc.fetch_whole_quad) - .LDS_REQ(bc.lds_req) - .SRC_GPR(bc.src_gpr) - .SRC_REL(bc.src_rel) - .SRC_SEL_X(bc.src_sel[0]) - .SRC_SEL_Y(bc.src_sel[1]) - .STRUCTURED_READ(bc.structured_read) - .VC_INST(ctx.fetch_opcode(bc.op)); - - if (bc.op == FETCH_OP_SEMFETCH) - bb << VTX_WORD1_SEM_ALL() - .DATA_FORMAT(bc.data_format) - .DST_SEL_X(bc.dst_sel[0]) - .DST_SEL_Y(bc.dst_sel[1]) - .DST_SEL_Z(bc.dst_sel[2]) - .DST_SEL_W(bc.dst_sel[3]) - .FORMAT_COMP_ALL(bc.format_comp_all) - .NUM_FORMAT_ALL(bc.num_format_all) - .SEMANTIC_ID(bc.semantic_id) - .SRF_MODE_ALL(bc.srf_mode_all) - .USE_CONST_FIELDS(bc.use_const_fields); - else - bb << VTX_WORD1_GPR_ALL() - .DATA_FORMAT(bc.data_format) - .DST_GPR(bc.dst_gpr) - .DST_REL(bc.dst_rel) - .DST_SEL_X(bc.dst_sel[0]) - .DST_SEL_Y(bc.dst_sel[1]) - .DST_SEL_Z(bc.dst_sel[2]) - .DST_SEL_W(bc.dst_sel[3]) - .FORMAT_COMP_ALL(bc.format_comp_all) - .NUM_FORMAT_ALL(bc.num_format_all) - .SRF_MODE_ALL(bc.srf_mode_all) - .USE_CONST_FIELDS(bc.use_const_fields); - - switch (ctx.hw_class) { - case HW_CLASS_R600: - bb << VTX_WORD2_R6() - .CONST_BUF_NO_STRIDE(bc.const_buf_no_stride) - .ENDIAN_SWAP(bc.endian_swap) - .MEGA_FETCH(bc.mega_fetch) - .OFFSET(bc.offset[0]); - break; - case HW_CLASS_R700: - bb << VTX_WORD2_R7() - .ALT_CONST(bc.alt_const) - .CONST_BUF_NO_STRIDE(bc.const_buf_no_stride) - .ENDIAN_SWAP(bc.endian_swap) - .MEGA_FETCH(bc.mega_fetch) - .OFFSET(bc.offset[0]); - break; - case HW_CLASS_EVERGREEN: - bb << VTX_WORD2_EG() - .ALT_CONST(bc.alt_const) - .BUFFER_INDEX_MODE(bc.resource_index_mode) - .CONST_BUF_NO_STRIDE(bc.const_buf_no_stride) - .ENDIAN_SWAP(bc.endian_swap) - .MEGA_FETCH(bc.mega_fetch) - .OFFSET(bc.offset[0]); - break; - case HW_CLASS_CAYMAN: - bb << VTX_WORD2_CM() - .ALT_CONST(bc.alt_const) - .BUFFER_INDEX_MODE(bc.resource_index_mode) - .CONST_BUF_NO_STRIDE(bc.const_buf_no_stride) - .ENDIAN_SWAP(bc.endian_swap) - .OFFSET(bc.offset[0]); - break; - default: - assert(!"unknown hw class"); - return -1; - } - - bb << 0; - return 0; -} - -int bc_builder::build_fetch_mem(fetch_node* n) { - const bc_fetch &bc = n->bc; - ASSERTED const fetch_op_info *fop = bc.op_ptr; - - assert(fop->flags & FF_MEM); - - bb << MEM_RD_WORD0_R7EGCM() - .MEM_INST(2) - .ELEM_SIZE(bc.elem_size) - .FETCH_WHOLE_QUAD(bc.fetch_whole_quad) - .MEM_OP(0) - .UNCACHED(bc.uncached) - .INDEXED(bc.indexed) - .SRC_SEL_Y(bc.src_sel[1]) - .SRC_GPR(bc.src_gpr) - .SRC_REL(bc.src_rel) - .SRC_SEL_X(bc.src_sel[0]) - .BURST_COUNT(bc.burst_count) - .LDS_REQ(bc.lds_req) - .COALESCED_READ(bc.coalesced_read); - - bb << MEM_RD_WORD1_R7EGCM() - .DST_GPR(bc.dst_gpr) - .DST_REL(bc.dst_rel) - .DST_SEL_X(bc.dst_sel[0]) - .DST_SEL_Y(bc.dst_sel[1]) - .DST_SEL_Z(bc.dst_sel[2]) - .DST_SEL_W(bc.dst_sel[3]) - .DATA_FORMAT(bc.data_format) - .NUM_FORMAT_ALL(bc.num_format_all) - .FORMAT_COMP_ALL(bc.format_comp_all) - .SRF_MODE_ALL(bc.srf_mode_all); - - bb << MEM_RD_WORD2_R7EGCM() - .ARRAY_BASE(bc.array_base) - .ENDIAN_SWAP(bc.endian_swap) - .ARR_SIZE(bc.array_size); - - bb << 0; - return 0; -} - -} diff --git a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp deleted file mode 100644 index b04cb73..0000000 --- a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp +++ /dev/null @@ -1,689 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include "sb_bc.h" - -namespace r600_sb { - -int bc_decoder::decode_cf(unsigned &i, bc_cf& bc) { - int r = 0; - uint32_t dw0 = dw[i]; - uint32_t dw1 = dw[i+1]; - assert(i+1 <= ndw); - - if ((dw1 >> 29) & 1) { // CF_ALU - return decode_cf_alu(i, bc); - } else { - // CF_INST field encoding on cayman is the same as on evergreen - unsigned opcode = ctx.is_egcm() ? - CF_WORD1_EG(dw1).get_CF_INST() : - CF_WORD1_R6R7(dw1).get_CF_INST(); - - bc.set_op(r600_isa_cf_by_opcode(ctx.isa, opcode, 0)); - - if (bc.op_ptr->flags & CF_EXP) { - return decode_cf_exp(i, bc); - } else if (bc.op_ptr->flags & CF_MEM) { - return decode_cf_mem(i, bc); - } - - if (ctx.is_egcm()) { - CF_WORD0_EGCM w0(dw0); - bc.addr = w0.get_ADDR(); - bc.jumptable_sel = w0.get_JUMPTABLE_SEL(); - - if (ctx.is_evergreen()) { - CF_WORD1_EG w1(dw1); - - bc.barrier = w1.get_BARRIER(); - bc.cf_const = w1.get_CF_CONST(); - bc.cond = w1.get_COND(); - bc.count = w1.get_COUNT(); - bc.end_of_program = w1.get_END_OF_PROGRAM(); - bc.pop_count = w1.get_POP_COUNT(); - bc.valid_pixel_mode = w1.get_VALID_PIXEL_MODE(); - bc.whole_quad_mode = w1.get_WHOLE_QUAD_MODE(); - - } else { // cayman - CF_WORD1_CM w1(dw1); - - bc.barrier = w1.get_BARRIER(); - bc.cf_const = w1.get_CF_CONST(); - bc.cond = w1.get_COND(); - bc.count = w1.get_COUNT(); - bc.pop_count = w1.get_POP_COUNT(); - bc.valid_pixel_mode = w1.get_VALID_PIXEL_MODE(); - } - - - } else { - CF_WORD0_R6R7 w0(dw0); - bc.addr = w0.get_ADDR(); - - CF_WORD1_R6R7 w1(dw1); - bc.barrier = w1.get_BARRIER(); - bc.cf_const = w1.get_CF_CONST(); - bc.cond = w1.get_COND(); - - if (ctx.is_r600()) - bc.count = w1.get_COUNT(); - else - bc.count = w1.get_COUNT() + (w1.get_COUNT_3() << 3); - - bc.end_of_program = w1.get_END_OF_PROGRAM(); - bc.pop_count = w1.get_POP_COUNT(); - bc.valid_pixel_mode = w1.get_VALID_PIXEL_MODE(); - bc.whole_quad_mode = w1.get_WHOLE_QUAD_MODE(); - bc.call_count = w1.get_CALL_COUNT(); - } - } - - i += 2; - - return r; -} - -int bc_decoder::decode_cf_alu(unsigned & i, bc_cf& bc) { - int r = 0; - uint32_t dw0 = dw[i++]; - uint32_t dw1 = dw[i++]; - - assert(i <= ndw); - - CF_ALU_WORD0_ALL w0(dw0); - - bc.kc[0].bank = w0.get_KCACHE_BANK0(); - bc.kc[1].bank = w0.get_KCACHE_BANK1(); - bc.kc[0].mode = w0.get_KCACHE_MODE0(); - - bc.addr = w0.get_ADDR(); - - if (ctx.is_r600()) { - CF_ALU_WORD1_R6 w1(dw1); - - bc.set_op(r600_isa_cf_by_opcode(ctx.isa, w1.get_CF_INST(), 1)); - - bc.kc[0].addr = w1.get_KCACHE_ADDR0(); - bc.kc[1].mode = w1.get_KCACHE_MODE1(); - bc.kc[1].addr = w1.get_KCACHE_ADDR1(); - - bc.barrier = w1.get_BARRIER(); - bc.count = w1.get_COUNT(); - bc.whole_quad_mode = w1.get_WHOLE_QUAD_MODE(); - - bc.uses_waterfall = w1.get_USES_WATERFALL(); - } else { - CF_ALU_WORD1_R7EGCM w1(dw1); - - bc.set_op(r600_isa_cf_by_opcode(ctx.isa, w1.get_CF_INST(), 1)); - - if (bc.op == CF_OP_ALU_EXT) { - CF_ALU_WORD0_EXT_EGCM w0(dw0); - CF_ALU_WORD1_EXT_EGCM w1(dw1); - - bc.kc[0].index_mode = w0.get_KCACHE_BANK_INDEX_MODE0(); - bc.kc[1].index_mode = w0.get_KCACHE_BANK_INDEX_MODE1(); - bc.kc[2].index_mode = w0.get_KCACHE_BANK_INDEX_MODE2(); - bc.kc[3].index_mode = w0.get_KCACHE_BANK_INDEX_MODE3(); - bc.kc[2].bank = w0.get_KCACHE_BANK2(); - bc.kc[3].bank = w0.get_KCACHE_BANK3(); - bc.kc[2].mode = w0.get_KCACHE_MODE2(); - bc.kc[3].mode = w1.get_KCACHE_MODE3(); - bc.kc[2].addr = w1.get_KCACHE_ADDR2(); - bc.kc[3].addr = w1.get_KCACHE_ADDR3(); - - r = decode_cf_alu(i, bc); - - } else { - - bc.kc[0].addr = w1.get_KCACHE_ADDR0(); - bc.kc[1].mode = w1.get_KCACHE_MODE1(); - bc.kc[1].addr = w1.get_KCACHE_ADDR1(); - bc.barrier = w1.get_BARRIER(); - bc.count = w1.get_COUNT(); - bc.whole_quad_mode = w1.get_WHOLE_QUAD_MODE(); - - bc.alt_const = w1.get_ALT_CONST(); - } - } - return r; -} - -int bc_decoder::decode_cf_exp(unsigned & i, bc_cf& bc) { - int r = 0; - uint32_t dw0 = dw[i++]; - uint32_t dw1 = dw[i++]; - assert(i <= ndw); - - CF_ALLOC_EXPORT_WORD0_ALL w0(dw0); - bc.array_base = w0.get_ARRAY_BASE(); - bc.elem_size = w0.get_ELEM_SIZE(); - bc.index_gpr = w0.get_INDEX_GPR(); - bc.rw_gpr = w0.get_RW_GPR(); - bc.rw_rel = w0.get_RW_REL(); - bc.type = w0.get_TYPE(); - - if (ctx.is_evergreen()) { - CF_ALLOC_EXPORT_WORD1_SWIZ_EG w1(dw1); - bc.barrier = w1.get_BARRIER(); - bc.burst_count = w1.get_BURST_COUNT(); - bc.end_of_program = w1.get_END_OF_PROGRAM(); - bc.sel[0] = w1.get_SEL_X(); - bc.sel[1] = w1.get_SEL_Y(); - bc.sel[2] = w1.get_SEL_Z(); - bc.sel[3] = w1.get_SEL_W(); - bc.valid_pixel_mode = w1.get_VALID_PIXEL_MODE(); - bc.mark = w1.get_MARK(); - - } else if (ctx.is_cayman()) { - CF_ALLOC_EXPORT_WORD1_SWIZ_CM w1(dw1); - bc.barrier = w1.get_BARRIER(); - bc.burst_count = w1.get_BURST_COUNT(); - bc.mark = w1.get_MARK(); - bc.sel[0] = w1.get_SEL_X(); - bc.sel[1] = w1.get_SEL_Y(); - bc.sel[2] = w1.get_SEL_Z(); - bc.sel[3] = w1.get_SEL_W(); - bc.valid_pixel_mode = w1.get_VALID_PIXEL_MODE(); - - } else { // r67 - CF_ALLOC_EXPORT_WORD1_SWIZ_R6R7 w1(dw1); - bc.barrier = w1.get_BARRIER(); - bc.burst_count = w1.get_BURST_COUNT(); - bc.end_of_program = w1.get_END_OF_PROGRAM(); - bc.sel[0] = w1.get_SEL_X(); - bc.sel[1] = w1.get_SEL_Y(); - bc.sel[2] = w1.get_SEL_Z(); - bc.sel[3] = w1.get_SEL_W(); - bc.valid_pixel_mode = w1.get_VALID_PIXEL_MODE(); - bc.whole_quad_mode = w1.get_WHOLE_QUAD_MODE(); - } - - return r; -} - - -int bc_decoder::decode_cf_mem(unsigned & i, bc_cf& bc) { - int r = 0; - uint32_t dw0 = dw[i++]; - uint32_t dw1 = dw[i++]; - assert(i <= ndw); - - if (!(bc.op_ptr->flags & CF_RAT)) { - CF_ALLOC_EXPORT_WORD0_ALL w0(dw0); - bc.array_base = w0.get_ARRAY_BASE(); - bc.elem_size = w0.get_ELEM_SIZE(); - bc.index_gpr = w0.get_INDEX_GPR(); - bc.rw_gpr = w0.get_RW_GPR(); - bc.rw_rel = w0.get_RW_REL(); - bc.type = w0.get_TYPE(); - } else { - assert(ctx.is_egcm()); - CF_ALLOC_EXPORT_WORD0_RAT_EGCM w0(dw0); - bc.elem_size = w0.get_ELEM_SIZE(); - bc.index_gpr = w0.get_INDEX_GPR(); - bc.rw_gpr = w0.get_RW_GPR(); - bc.rw_rel = w0.get_RW_REL(); - bc.type = w0.get_TYPE(); - bc.rat_id = w0.get_RAT_ID(); - bc.rat_inst = w0.get_RAT_INST(); - bc.rat_index_mode = w0.get_RAT_INDEX_MODE(); - } - - if (ctx.is_evergreen()) { - CF_ALLOC_EXPORT_WORD1_BUF_EG w1(dw1); - bc.barrier = w1.get_BARRIER(); - bc.burst_count = w1.get_BURST_COUNT(); - bc.end_of_program = w1.get_END_OF_PROGRAM(); - bc.valid_pixel_mode = w1.get_VALID_PIXEL_MODE(); - bc.mark = w1.get_MARK(); - bc.array_size = w1.get_ARR_SIZE(); - bc.comp_mask = w1.get_COMP_MASK(); - - } else if (ctx.is_cayman()) { - CF_ALLOC_EXPORT_WORD1_BUF_CM w1(dw1); - bc.barrier = w1.get_BARRIER(); - bc.burst_count = w1.get_BURST_COUNT(); - bc.mark = w1.get_MARK(); - bc.valid_pixel_mode = w1.get_VALID_PIXEL_MODE(); - bc.array_size = w1.get_ARR_SIZE(); - bc.comp_mask = w1.get_COMP_MASK(); - - } else { // r67 - CF_ALLOC_EXPORT_WORD1_BUF_R6R7 w1(dw1); - bc.barrier = w1.get_BARRIER(); - bc.burst_count = w1.get_BURST_COUNT(); - bc.end_of_program = w1.get_END_OF_PROGRAM(); - bc.valid_pixel_mode = w1.get_VALID_PIXEL_MODE(); - bc.whole_quad_mode = w1.get_WHOLE_QUAD_MODE(); - bc.array_size = w1.get_ARR_SIZE(); - bc.comp_mask = w1.get_COMP_MASK(); - bc.whole_quad_mode = w1.get_WHOLE_QUAD_MODE(); - } - - return r; -} - -int bc_decoder::decode_alu(unsigned & i, bc_alu& bc) { - int r = 0; - uint32_t dw0 = dw[i++]; - uint32_t dw1 = dw[i++]; - assert(i <= ndw); - - ALU_WORD0_ALL w0(dw0); - bc.index_mode = w0.get_INDEX_MODE(); - bc.last = w0.get_LAST(); - bc.pred_sel = w0.get_PRED_SEL(); - bc.src[0].chan = w0.get_SRC0_CHAN(); - bc.src[0].sel = w0.get_SRC0_SEL(); - bc.src[0].neg = w0.get_SRC0_NEG(); - bc.src[0].rel = w0.get_SRC0_REL(); - bc.src[1].chan = w0.get_SRC1_CHAN(); - bc.src[1].sel = w0.get_SRC1_SEL(); - bc.src[1].neg = w0.get_SRC1_NEG(); - bc.src[1].rel = w0.get_SRC1_REL(); - - if ((dw1 >> 15) & 7) { // op3 - ALU_WORD1_OP3_ALL w1(dw1); - bc.set_op(r600_isa_alu_by_opcode(ctx.isa, w1.get_ALU_INST(), 1)); - - if (bc.op == ALU_OP3_LDS_IDX_OP) { - ALU_WORD0_LDS_IDX_OP_EGCM iw0(dw0); - ALU_WORD1_LDS_IDX_OP_EGCM iw1(dw1); - bc.index_mode = iw0.get_INDEX_MODE(); - bc.last = iw0.get_LAST(); - bc.pred_sel = iw0.get_PRED_SEL(); - bc.src[0].chan = iw0.get_SRC0_CHAN(); - bc.src[0].sel = iw0.get_SRC0_SEL(); - bc.src[0].rel = iw0.get_SRC0_REL(); - - bc.src[1].chan = iw0.get_SRC1_CHAN(); - bc.src[1].sel = iw0.get_SRC1_SEL(); - bc.src[1].rel = iw0.get_SRC1_REL(); - - bc.bank_swizzle = iw1.get_BANK_SWIZZLE(); - bc.src[2].chan = iw1.get_SRC2_CHAN(); - bc.src[2].sel = iw1.get_SRC2_SEL(); - bc.src[2].rel = iw1.get_SRC2_REL(); - bc.dst_chan = iw1.get_DST_CHAN(); - // TODO: clean up - for (size_t k = 0, e = r600_alu_op_table_size(); k != e; k++) { - if (((r600_alu_op_table[k].opcode[1] >> 8) & 0xff) == iw1.get_LDS_OP()) { - bc.op_ptr = &r600_alu_op_table[k]; - bc.op = k; - break; - } - } - bc.lds_idx_offset = - (iw0.get_IDX_OFFSET_4() << 4) | - (iw0.get_IDX_OFFSET_5() << 5) | - (iw1.get_IDX_OFFSET_1() << 1) | - (iw1.get_IDX_OFFSET_0() << 0) | - (iw1.get_IDX_OFFSET_2() << 2) | - (iw1.get_IDX_OFFSET_3() << 3); - } - else { - bc.bank_swizzle = w1.get_BANK_SWIZZLE(); - bc.clamp = w1.get_CLAMP(); - bc.dst_chan = w1.get_DST_CHAN(); - bc.dst_gpr = w1.get_DST_GPR(); - bc.dst_rel = w1.get_DST_REL(); - - bc.src[2].chan = w1.get_SRC2_CHAN(); - bc.src[2].sel = w1.get_SRC2_SEL(); - bc.src[2].neg = w1.get_SRC2_NEG(); - bc.src[2].rel = w1.get_SRC2_REL(); - } - - } else { // op2 - if (ctx.is_r600()) { - ALU_WORD1_OP2_R6 w1(dw1); - bc.set_op(r600_isa_alu_by_opcode(ctx.isa, w1.get_ALU_INST(), 0)); - - bc.bank_swizzle = w1.get_BANK_SWIZZLE(); - bc.clamp = w1.get_CLAMP(); - bc.dst_chan = w1.get_DST_CHAN(); - bc.dst_gpr = w1.get_DST_GPR(); - bc.dst_rel = w1.get_DST_REL(); - - bc.omod = w1.get_OMOD(); - bc.src[0].abs = w1.get_SRC0_ABS(); - bc.src[1].abs = w1.get_SRC1_ABS(); - bc.write_mask = w1.get_WRITE_MASK(); - bc.update_exec_mask = w1.get_UPDATE_EXEC_MASK(); - bc.update_pred = w1.get_UPDATE_PRED(); - - bc.fog_merge = w1.get_FOG_MERGE(); - - } else { - ALU_WORD1_OP2_R7EGCM w1(dw1); - bc.set_op(r600_isa_alu_by_opcode(ctx.isa, w1.get_ALU_INST(), 0)); - - bc.bank_swizzle = w1.get_BANK_SWIZZLE(); - bc.clamp = w1.get_CLAMP(); - bc.dst_chan = w1.get_DST_CHAN(); - bc.dst_gpr = w1.get_DST_GPR(); - bc.dst_rel = w1.get_DST_REL(); - - bc.omod = w1.get_OMOD(); - bc.src[0].abs = w1.get_SRC0_ABS(); - bc.src[1].abs = w1.get_SRC1_ABS(); - bc.write_mask = w1.get_WRITE_MASK(); - bc.update_exec_mask = w1.get_UPDATE_EXEC_MASK(); - bc.update_pred = w1.get_UPDATE_PRED(); - } - } - - bc.slot_flags = (alu_op_flags)bc.op_ptr->slots[ctx.isa->hw_class]; - return r; -} - -int bc_decoder::decode_fetch(unsigned & i, bc_fetch& bc) { - int r = 0; - uint32_t dw0 = dw[i]; - uint32_t dw1 = dw[i+1]; - uint32_t dw2 = dw[i+2]; - assert(i + 4 <= ndw); - - unsigned fetch_opcode = dw0 & 0x1F; - - if (fetch_opcode == 2) { // MEM_INST_MEM - unsigned mem_op = (dw0 >> 8) & 0x7; - unsigned gds_op; - if (mem_op == 0 || mem_op == 2) { - fetch_opcode = mem_op == 0 ? FETCH_OP_READ_SCRATCH : FETCH_OP_READ_MEM; - } else if (mem_op == 4) { - gds_op = (dw1 >> 9) & 0x1f; - if ((dw1 >> 9) & 0x20) - fetch_opcode = FETCH_OP_GDS_ADD_RET + gds_op; - else - fetch_opcode = FETCH_OP_GDS_ADD + gds_op; - } else if (mem_op == 5) - fetch_opcode = FETCH_OP_TF_WRITE; - bc.set_op(fetch_opcode); - } else - bc.set_op(r600_isa_fetch_by_opcode(ctx.isa, fetch_opcode)); - - if (bc.op_ptr->flags & FF_MEM) - return decode_fetch_mem(i, bc); - - if (bc.op_ptr->flags & FF_GDS) - return decode_fetch_gds(i, bc); - - if (bc.op_ptr->flags & FF_VTX) - return decode_fetch_vtx(i, bc); - - // tex - - if (ctx.is_r600()) { - TEX_WORD0_R6 w0(dw0); - - bc.bc_frac_mode = w0.get_BC_FRAC_MODE(); - bc.fetch_whole_quad = w0.get_FETCH_WHOLE_QUAD(); - bc.resource_id = w0.get_RESOURCE_ID(); - bc.src_gpr = w0.get_SRC_GPR(); - bc.src_rel = w0.get_SRC_REL(); - - } else if (ctx.is_r600()) { - TEX_WORD0_R7 w0(dw0); - - bc.bc_frac_mode = w0.get_BC_FRAC_MODE(); - bc.fetch_whole_quad = w0.get_FETCH_WHOLE_QUAD(); - bc.resource_id = w0.get_RESOURCE_ID(); - bc.src_gpr = w0.get_SRC_GPR(); - bc.src_rel = w0.get_SRC_REL(); - bc.alt_const = w0.get_ALT_CONST(); - - } else { // eg/cm - TEX_WORD0_EGCM w0(dw0); - - bc.fetch_whole_quad = w0.get_FETCH_WHOLE_QUAD(); - bc.resource_id = w0.get_RESOURCE_ID(); - bc.src_gpr = w0.get_SRC_GPR(); - bc.src_rel = w0.get_SRC_REL(); - bc.alt_const = w0.get_ALT_CONST(); - bc.inst_mod = w0.get_INST_MOD(); - bc.resource_index_mode = w0.get_RESOURCE_INDEX_MODE(); - bc.sampler_index_mode = w0.get_SAMPLER_INDEX_MODE(); - } - - TEX_WORD1_ALL w1(dw1); - bc.coord_type[0] = w1.get_COORD_TYPE_X(); - bc.coord_type[1] = w1.get_COORD_TYPE_Y(); - bc.coord_type[2] = w1.get_COORD_TYPE_Z(); - bc.coord_type[3] = w1.get_COORD_TYPE_W(); - bc.dst_gpr = w1.get_DST_GPR(); - bc.dst_rel = w1.get_DST_REL(); - bc.dst_sel[0] = w1.get_DST_SEL_X(); - bc.dst_sel[1] = w1.get_DST_SEL_Y(); - bc.dst_sel[2] = w1.get_DST_SEL_Z(); - bc.dst_sel[3] = w1.get_DST_SEL_W(); - bc.lod_bias = w1.get_LOD_BIAS(); - - TEX_WORD2_ALL w2(dw2); - bc.offset[0] = w2.get_OFFSET_X(); - bc.offset[1] = w2.get_OFFSET_Y(); - bc.offset[2] = w2.get_OFFSET_Z(); - bc.sampler_id = w2.get_SAMPLER_ID(); - bc.src_sel[0] = w2.get_SRC_SEL_X(); - bc.src_sel[1] = w2.get_SRC_SEL_Y(); - bc.src_sel[2] = w2.get_SRC_SEL_Z(); - bc.src_sel[3] = w2.get_SRC_SEL_W(); - - i += 4; - return r; -} - -int bc_decoder::decode_fetch_gds(unsigned & i, bc_fetch& bc) { - int r = 0; - uint32_t dw0 = dw[i]; - uint32_t dw1 = dw[i+1]; - uint32_t dw2 = dw[i+2]; - uint32_t tmp; - /* GDS instructions align to 4 words boundaries */ - i+= 4; - assert(i <= ndw); - - MEM_GDS_WORD0_EGCM w0(dw0); - bc.src_gpr = w0.get_SRC_GPR(); - tmp = w0.get_SRC_REL_MODE(); - bc.src_rel_global = (tmp == 2); - bc.src_sel[0] = w0.get_SRC_SEL_X(); - bc.src_sel[1] = w0.get_SRC_SEL_Y(); - bc.src_sel[2] = w0.get_SRC_SEL_Z(); - - MEM_GDS_WORD1_EGCM w1(dw1); - bc.dst_gpr = w1.get_DST_GPR(); - tmp = w1.get_DST_REL_MODE(); - bc.dst_rel_global = (tmp == 2); - bc.src2_gpr = w1.get_SRC_GPR(); - bc.alloc_consume = w1.get_ALLOC_CONSUME(); - bc.uav_id = w1.get_UAV_ID(); - bc.uav_index_mode = w1.get_UAV_INDEX_MODE(); - bc.bcast_first_req = w1.get_BCAST_FIRST_REQ(); - - MEM_GDS_WORD2_EGCM w2(dw2); - bc.dst_sel[0] = w2.get_DST_SEL_X(); - bc.dst_sel[1] = w2.get_DST_SEL_Y(); - bc.dst_sel[2] = w2.get_DST_SEL_Z(); - bc.dst_sel[3] = w2.get_DST_SEL_W(); - return r; -} - -int bc_decoder::decode_fetch_mem(unsigned & i, bc_fetch& bc) { - int r = 0; - uint32_t dw0 = dw[i]; - uint32_t dw1 = dw[i+1]; - uint32_t dw2 = dw[i+2]; - - i += 4; // MEM instructions align to 4 words boundaries - - assert(i <= ndw); - - MEM_RD_WORD0_R7EGCM w0(dw0); - bc.elem_size = w0.get_ELEM_SIZE(); - bc.fetch_whole_quad = w0.get_FETCH_WHOLE_QUAD(); - bc.uncached = w0.get_UNCACHED(); - bc.indexed = w0.get_INDEXED(); - bc.src_sel[1] = w0.get_SRC_SEL_Y(); - bc.src_gpr = w0.get_SRC_GPR(); - bc.src_rel = w0.get_SRC_REL(); - bc.src_sel[0] = w0.get_SRC_SEL_X(); - bc.burst_count = w0.get_BURST_COUNT(); - bc.lds_req = w0.get_LDS_REQ(); - bc.coalesced_read = w0.get_COALESCED_READ(); - - MEM_RD_WORD1_R7EGCM w1(dw1); - bc.dst_gpr = w1.get_DST_GPR(); - bc.dst_rel = w1.get_DST_REL(); - bc.dst_sel[0] = w1.get_DST_SEL_X(); - bc.dst_sel[1] = w1.get_DST_SEL_Y(); - bc.dst_sel[2] = w1.get_DST_SEL_Z(); - bc.dst_sel[3] = w1.get_DST_SEL_W(); - bc.data_format = w1.get_DATA_FORMAT(); - bc.num_format_all = w1.get_NUM_FORMAT_ALL(); - bc.format_comp_all = w1.get_FORMAT_COMP_ALL(); - bc.srf_mode_all = w1.get_SRF_MODE_ALL(); - - MEM_RD_WORD2_R7EGCM w2(dw2); - bc.array_base = w2.get_ARRAY_BASE(); - bc.endian_swap = w2.get_ENDIAN_SWAP(); - bc.array_size = w2.get_ARR_SIZE(); - - return r; -} - -int bc_decoder::decode_fetch_vtx(unsigned & i, bc_fetch& bc) { - int r = 0; - uint32_t dw0 = dw[i]; - uint32_t dw1 = dw[i+1]; - uint32_t dw2 = dw[i+2]; - i+= 4; - assert(i <= ndw); - - if (ctx.is_cayman()) { - VTX_WORD0_CM w0(dw0); - bc.resource_id = w0.get_BUFFER_ID(); - bc.fetch_type = w0.get_FETCH_TYPE(); - bc.fetch_whole_quad = w0.get_FETCH_WHOLE_QUAD(); - bc.src_gpr = w0.get_SRC_GPR(); - bc.src_rel = w0.get_SRC_REL(); - bc.src_sel[0] = w0.get_SRC_SEL_X(); - bc.coalesced_read = w0.get_COALESCED_READ(); - bc.lds_req = w0.get_LDS_REQ(); - bc.structured_read = w0.get_STRUCTURED_READ(); - - } else { - VTX_WORD0_R6R7EG w0(dw0); - bc.resource_id = w0.get_BUFFER_ID(); - bc.fetch_type = w0.get_FETCH_TYPE(); - bc.fetch_whole_quad = w0.get_FETCH_WHOLE_QUAD(); - bc.mega_fetch_count = w0.get_MEGA_FETCH_COUNT(); - bc.src_gpr = w0.get_SRC_GPR(); - bc.src_rel = w0.get_SRC_REL(); - bc.src_sel[0] = w0.get_SRC_SEL_X(); - } - - if (bc.op == FETCH_OP_SEMFETCH) { - VTX_WORD1_SEM_ALL w1(dw1); - bc.data_format = w1.get_DATA_FORMAT(); - bc.dst_sel[0] = w1.get_DST_SEL_X(); - bc.dst_sel[1] = w1.get_DST_SEL_Y(); - bc.dst_sel[2] = w1.get_DST_SEL_Z(); - bc.dst_sel[3] = w1.get_DST_SEL_W(); - bc.format_comp_all = w1.get_FORMAT_COMP_ALL(); - bc.num_format_all = w1.get_NUM_FORMAT_ALL(); - bc.srf_mode_all = w1.get_SRF_MODE_ALL(); - bc.use_const_fields = w1.get_USE_CONST_FIELDS(); - - bc.semantic_id = w1.get_SEMANTIC_ID(); - - } else { - VTX_WORD1_GPR_ALL w1(dw1); - bc.data_format = w1.get_DATA_FORMAT(); - bc.dst_sel[0] = w1.get_DST_SEL_X(); - bc.dst_sel[1] = w1.get_DST_SEL_Y(); - bc.dst_sel[2] = w1.get_DST_SEL_Z(); - bc.dst_sel[3] = w1.get_DST_SEL_W(); - bc.format_comp_all = w1.get_FORMAT_COMP_ALL(); - bc.num_format_all = w1.get_NUM_FORMAT_ALL(); - bc.srf_mode_all = w1.get_SRF_MODE_ALL(); - bc.use_const_fields = w1.get_USE_CONST_FIELDS(); - - bc.dst_gpr = w1.get_DST_GPR(); - bc.dst_rel = w1.get_DST_REL(); - } - - switch (ctx.hw_class) { - case HW_CLASS_R600: - { - VTX_WORD2_R6 w2(dw2); - bc.const_buf_no_stride = w2.get_CONST_BUF_NO_STRIDE(); - bc.endian_swap = w2.get_ENDIAN_SWAP(); - bc.mega_fetch = w2.get_MEGA_FETCH(); - bc.offset[0] = w2.get_OFFSET(); - break; - } - case HW_CLASS_R700: - { - VTX_WORD2_R7 w2(dw2); - bc.const_buf_no_stride = w2.get_CONST_BUF_NO_STRIDE(); - bc.endian_swap = w2.get_ENDIAN_SWAP(); - bc.mega_fetch = w2.get_MEGA_FETCH(); - bc.offset[0] = w2.get_OFFSET(); - bc.alt_const = w2.get_ALT_CONST(); - break; - } - case HW_CLASS_EVERGREEN: - { - VTX_WORD2_EG w2(dw2); - bc.const_buf_no_stride = w2.get_CONST_BUF_NO_STRIDE(); - bc.endian_swap = w2.get_ENDIAN_SWAP(); - bc.mega_fetch = w2.get_MEGA_FETCH(); - bc.offset[0] = w2.get_OFFSET(); - bc.alt_const = w2.get_ALT_CONST(); - bc.resource_index_mode = w2.get_BUFFER_INDEX_MODE(); - break; - } - case HW_CLASS_CAYMAN: - { - VTX_WORD2_CM w2(dw2); - bc.const_buf_no_stride = w2.get_CONST_BUF_NO_STRIDE(); - bc.endian_swap = w2.get_ENDIAN_SWAP(); - bc.offset[0] = w2.get_OFFSET(); - bc.alt_const = w2.get_ALT_CONST(); - bc.resource_index_mode = w2.get_BUFFER_INDEX_MODE(); - break; - } - default: - assert(!"unknown hw class"); - return -1; - } - - return r; -} - -} diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp deleted file mode 100644 index a7c4ccd..0000000 --- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp +++ /dev/null @@ -1,589 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include "sb_bc.h" -#include "sb_shader.h" -#include "sb_pass.h" -#include "eg_sq.h" // V_SQ_CF_INDEX_0/1 - -namespace r600_sb { - -static const char* chans = "xyzw01?_"; - -static const char* vec_bs[] = { - "VEC_012", "VEC_021", "VEC_120", "VEC_102", "VEC_201", "VEC_210" -}; - -static const char* scl_bs[] = { - "SCL_210", "SCL_122", "SCL_212", "SCL_221" -}; - - -bool bc_dump::visit(cf_node& n, bool enter) { - if (enter) { - - id = n.bc.id << 1; - - if ((n.bc.op_ptr->flags & CF_ALU) && n.bc.is_alu_extended()) { - dump_dw(id, 2); - id += 2; - sblog << "\n"; - } - - dump_dw(id, 2); - dump(n); - - if (n.bc.op_ptr->flags & CF_CLAUSE) { - id = n.bc.addr << 1; - new_group = 1; - } - } - return true; -} - -bool bc_dump::visit(alu_node& n, bool enter) { - if (enter) { - sblog << " "; - dump_dw(id, 2); - - if (new_group) { - sblog.print_w(++group_index, 5); - sblog << " "; - } else - sblog << " "; - - dump(n); - id += 2; - - new_group = n.bc.last; - } else { - if (n.bc.last) { - alu_group_node *g = - static_cast(n.get_alu_group_node()); - assert(g); - for (unsigned k = 0; k < g->literals.size(); ++k) { - sblog << " "; - dump_dw(id, 1); - id += 1; - sblog << "\n"; - } - - id = (id + 1) & ~1u; - } - } - - return false; -} - -bool bc_dump::visit(fetch_node& n, bool enter) { - if (enter) { - sblog << " "; - dump_dw(id, 3); - dump(n); - id += 4; - } - return false; -} - -static void fill_to(sb_ostringstream &s, int pos) { - int l = s.str().length(); - if (l < pos) - s << std::string(pos-l, ' '); -} - -void bc_dump::dump(cf_node& n) { - sb_ostringstream s; - s << n.bc.op_ptr->name; - - if (n.bc.op_ptr->flags & CF_EXP) { - static const char *exp_type[] = {"PIXEL", "POS ", "PARAM"}; - - fill_to(s, 18); - s << " " << exp_type[n.bc.type] << " "; - - if (n.bc.burst_count) { - sb_ostringstream s2; - s2 << n.bc.array_base << "-" << n.bc.array_base + n.bc.burst_count; - s.print_wl(s2.str(), 5); - s << " R" << n.bc.rw_gpr << "-" << - n.bc.rw_gpr + n.bc.burst_count << "."; - } else { - s.print_wl(n.bc.array_base, 5); - s << " R" << n.bc.rw_gpr << "."; - } - - for (int k = 0; k < 4; ++k) - s << chans[n.bc.sel[k]]; - - } else if (n.bc.op_ptr->flags & CF_MEM) { - static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK", - "WRITE_IND_ACK"}; - static const char *exp_type_r600[] = {"WRITE", "WRITE_IND", - "READ", "READ_IND"}; - - fill_to(s, 18); - if (ctx.hw_class == HW_CLASS_R600 && n.bc.op == CF_OP_MEM_SCRATCH) - s << " " << exp_type_r600[n.bc.type] << " "; - else - s << " " << exp_type[n.bc.type] << " "; - s.print_wl(n.bc.array_base, 5); - s << " R" << n.bc.rw_gpr << "."; - for (int k = 0; k < 4; ++k) - s << ((n.bc.comp_mask & (1 << k)) ? chans[k] : '_'); - - if ((n.bc.op_ptr->flags & CF_RAT) && (n.bc.type & 1)) { - s << ", @R" << n.bc.index_gpr << ".xyz"; - } - if ((n.bc.op_ptr->flags & CF_MEM) && (n.bc.type & 1)) { - s << ", @R" << n.bc.index_gpr << ".x"; - } - - s << " ES:" << n.bc.elem_size; - - s << " OP:" << n.bc.rat_inst; - - if (n.bc.mark) - s << " MARK"; - - } else { - - if (n.bc.op_ptr->flags & CF_CLAUSE) { - s << " " << n.bc.count+1; - } - - s << " @" << (n.bc.addr << 1); - - if (n.bc.op_ptr->flags & CF_ALU) { - static const char *index_mode[] = {"", " CF_INDEX_0", " CF_INDEX_1"}; - - for (int k = 0; k < 4; ++k) { - bc_kcache &kc = n.bc.kc[k]; - if (kc.mode) { - s << " KC" << k << "[CB" << kc.bank << ":" << - (kc.addr << 4) << "-" << - (((kc.addr + kc.mode) << 4) - 1) << index_mode[kc.index_mode] << "]"; - } - } - } - - if (n.bc.cond) - s << " CND:" << n.bc.cond; - - if (n.bc.pop_count) - s << " POP:" << n.bc.pop_count; - - if (n.bc.count && (n.bc.op_ptr->flags & CF_EMIT)) - s << " STREAM" << n.bc.count; - } - - if (!n.bc.barrier) - s << " NO_BARRIER"; - - if (n.bc.valid_pixel_mode) - s << " VPM"; - - if (n.bc.whole_quad_mode) - s << " WQM"; - - if (n.bc.end_of_program) - s << " EOP"; - - sblog << s.str() << "\n"; -} - - -static void print_sel(sb_ostream &s, int sel, int rel, int index_mode, - int need_brackets) { - if (rel && index_mode >= 5 && sel < 128) - s << "G"; - if (rel || need_brackets) { - s << "["; - } - s << sel; - if (rel) { - if (index_mode == 0 || index_mode == 6) - s << "+AR"; - else if (index_mode == 4) - s << "+AL"; - } - if (rel || need_brackets) { - s << "]"; - } -} - -static void print_dst(sb_ostream &s, bc_alu &alu) -{ - unsigned sel = alu.dst_gpr; - char reg_char = 'R'; - if (sel >= 128 - 4) { // clause temporary gpr - sel -= 128 - 4; - reg_char = 'T'; - } - - if (alu.write_mask || (alu.op_ptr->src_count == 3 && alu.op < LDS_OP2_LDS_ADD)) { - s << reg_char; - print_sel(s, sel, alu.dst_rel, alu.index_mode, 0); - } else { - s << "__"; - } - s << "."; - s << chans[alu.dst_chan]; -} - -static void print_src(sb_ostream &s, bc_alu &alu, unsigned idx) -{ - bc_alu_src *src = &alu.src[idx]; - unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0; - - if (src->neg) - s <<"-"; - if (src->abs) - s <<"|"; - - if (sel < 128 - 4) { - s << "R"; - } else if (sel < 128) { - s << "T"; - sel -= 128 - 4; - } else if (sel < 160) { - s << "KC0"; - need_brackets = 1; - sel -= 128; - } else if (sel < 192) { - s << "KC1"; - need_brackets = 1; - sel -= 160; - } else if (sel >= 448) { - s << "Param"; - sel -= 448; - } else if (sel >= 288) { - s << "KC3"; - need_brackets = 1; - sel -= 288; - } else if (sel >= 256) { - s << "KC2"; - need_brackets = 1; - sel -= 256; - } else { - need_sel = 0; - need_chan = 0; - switch (sel) { - case ALU_SRC_LDS_OQ_A: - s << "LDS_OQ_A"; - need_chan = 1; - break; - case ALU_SRC_LDS_OQ_B: - s << "LDS_OQ_B"; - need_chan = 1; - break; - case ALU_SRC_LDS_OQ_A_POP: - s << "LDS_OQ_A_POP"; - need_chan = 1; - break; - case ALU_SRC_LDS_OQ_B_POP: - s << "LDS_OQ_B_POP"; - need_chan = 1; - break; - case ALU_SRC_LDS_DIRECT_A: - s << "LDS_A["; s.print_zw_hex(src->value.u, 8); s << "]"; - break; - case ALU_SRC_LDS_DIRECT_B: - s << "LDS_B["; s.print_zw_hex(src->value.u, 8); s << "]"; - break; - case ALU_SRC_PS: - s << "PS"; - break; - case ALU_SRC_PV: - s << "PV"; - need_chan = 1; - break; - case ALU_SRC_LITERAL: - s << "[0x"; - s.print_zw_hex(src->value.u, 8); - s << " " << src->value.f << "]"; - need_chan = 1; - break; - case ALU_SRC_0_5: - s << "0.5"; - break; - case ALU_SRC_M_1_INT: - s << "-1"; - break; - case ALU_SRC_1_INT: - s << "1"; - break; - case ALU_SRC_1: - s << "1.0"; - break; - case ALU_SRC_0: - s << "0"; - break; - case ALU_SRC_TIME_LO: - s << "TIME_LO"; - break; - case ALU_SRC_TIME_HI: - s << "TIME_HI"; - break; - case ALU_SRC_MASK_LO: - s << "MASK_LO"; - break; - case ALU_SRC_MASK_HI: - s << "MASK_HI"; - break; - case ALU_SRC_HW_WAVE_ID: - s << "HW_WAVE_ID"; - break; - case ALU_SRC_SIMD_ID: - s << "SIMD_ID"; - break; - case ALU_SRC_SE_ID: - s << "SE_ID"; - break; - default: - s << "??IMM_" << sel; - break; - } - } - - if (need_sel) - print_sel(s, sel, src->rel, alu.index_mode, need_brackets); - - if (need_chan) { - s << "." << chans[src->chan]; - } - - if (src->abs) - s << "|"; -} -void bc_dump::dump(alu_node& n) { - sb_ostringstream s; - static const char *omod_str[] = {"","*2","*4","/2"}; - static const char *slots = "xyzwt"; - - s << (n.bc.update_exec_mask ? "M" : " "); - s << (n.bc.update_pred ? "P" : " "); - s << " "; - s << (n.bc.pred_sel>=2 ? (n.bc.pred_sel == 2 ? "0" : "1") : " "); - s << " "; - - s << slots[n.bc.slot] << ": "; - - s << n.bc.op_ptr->name << omod_str[n.bc.omod] << (n.bc.clamp ? "_sat" : ""); - fill_to(s, 26); - s << " "; - - print_dst(s, n.bc); - for (int k = 0; k < n.bc.op_ptr->src_count; ++k) { - s << (k ? ", " : ", "); - print_src(s, n.bc, k); - } - - if (n.bc.bank_swizzle) { - fill_to(s, 55); - if (n.bc.slot == SLOT_TRANS) - s << " " << scl_bs[n.bc.bank_swizzle]; - else - s << " " << vec_bs[n.bc.bank_swizzle]; - } - - if (ctx.is_cayman()) { - if (n.bc.op == ALU_OP1_MOVA_INT) { - static const char *mova_str[] = { " AR_X", " PC", " CF_IDX0", " CF_IDX1", - " Unknown MOVA_INT dest" }; - s << mova_str[std::min(n.bc.dst_gpr, 4u)]; // CM_V_SQ_MOVA_DST_AR_* - } - } - - if (n.bc.lds_idx_offset) { - s << " IDX_OFFSET:" << n.bc.lds_idx_offset; - } - - sblog << s.str() << "\n"; -} - -int bc_dump::init() { - sb_ostringstream s; - s << "===== SHADER #" << sh.id; - - if (sh.optimized) - s << " OPT"; - - s << " "; - - std::string target = std::string(" ") + - sh.get_full_target_name() + " ====="; - - while (s.str().length() + target.length() < 80) - s << "="; - - s << target; - - sblog << "\n" << s.str() << "\n"; - - s.clear(); - - if (bc_data) { - s << "===== " << ndw << " dw ===== " << sh.ngpr - << " gprs ===== " << sh.nstack << " stack "; - } - - while (s.str().length() < 80) - s << "="; - - sblog << s.str() << "\n"; - - return 0; -} - -int bc_dump::done() { - sb_ostringstream s; - s << "===== SHADER_END "; - - while (s.str().length() < 80) - s << "="; - - sblog << s.str() << "\n\n"; - - return 0; -} - -bc_dump::bc_dump(shader& s, bytecode* bc) : - vpass(s), bc_data(), ndw(), id(), - new_group(), group_index() { - - if (bc) { - bc_data = bc->data(); - ndw = bc->ndw(); - } -} - -void bc_dump::dump(fetch_node& n) { - sb_ostringstream s; - static const char * fetch_type[] = {"VERTEX", "INSTANCE", ""}; - unsigned gds = n.bc.op_ptr->flags & FF_GDS; - bool gds_has_ret = gds && n.bc.op >= FETCH_OP_GDS_ADD_RET && - n.bc.op <= FETCH_OP_GDS_USHORT_READ_RET; - bool show_dst = !gds || (gds && gds_has_ret); - - s << n.bc.op_ptr->name; - fill_to(s, 20); - - if (show_dst) { - s << "R"; - print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0); - s << "."; - for (int k = 0; k < 4; ++k) - s << chans[n.bc.dst_sel[k]]; - s << ", "; - } - - s << "R"; - print_sel(s, n.bc.src_gpr, n.bc.src_rel, INDEX_LOOP, 0); - s << "."; - - unsigned vtx = n.bc.op_ptr->flags & FF_VTX; - unsigned num_src_comp = gds ? 3 : vtx ? ctx.is_cayman() ? 2 : 1 : 4; - - for (unsigned k = 0; k < num_src_comp; ++k) - s << chans[n.bc.src_sel[k]]; - - if (vtx && n.bc.offset[0]) { - s << " + " << n.bc.offset[0] << "b "; - } - - if (!gds) - s << ", RID:" << n.bc.resource_id; - - if (gds) { - s << " UAV:" << n.bc.uav_id; - if (n.bc.uav_index_mode) - s << " UAV:SQ_CF_INDEX_" << (n.bc.uav_index_mode - V_SQ_CF_INDEX_0); - if (n.bc.bcast_first_req) - s << " BFQ"; - if (n.bc.alloc_consume) - s << " AC"; - } else if (vtx) { - s << " " << fetch_type[n.bc.fetch_type]; - if (!ctx.is_cayman() && n.bc.mega_fetch_count) - s << " MFC:" << n.bc.mega_fetch_count; - if (n.bc.fetch_whole_quad) - s << " FWQ"; - if (ctx.is_egcm() && n.bc.resource_index_mode) - s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0); - if (ctx.is_egcm() && n.bc.sampler_index_mode) - s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0); - - s << " UCF:" << n.bc.use_const_fields - << " FMT(DTA:" << n.bc.data_format - << " NUM:" << n.bc.num_format_all - << " COMP:" << n.bc.format_comp_all - << " MODE:" << n.bc.srf_mode_all << ")"; - } else { - s << ", SID:" << n.bc.sampler_id; - if (n.bc.lod_bias) - s << " LB:" << n.bc.lod_bias; - s << " CT:"; - for (unsigned k = 0; k < 4; ++k) - s << (n.bc.coord_type[k] ? "N" : "U"); - for (unsigned k = 0; k < 3; ++k) - if (n.bc.offset[k]) - s << " O" << chans[k] << ":" << n.bc.offset[k]; - if (ctx.is_egcm() && n.bc.resource_index_mode) - s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0); - if (ctx.is_egcm() && n.bc.sampler_index_mode) - s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0); - } - - if (n.bc.op_ptr->flags & FF_MEM) { - s << ", ELEM_SIZE:" << n.bc.elem_size; - if (n.bc.uncached) - s << ", UNCACHED"; - if (n.bc.indexed) - s << ", INDEXED"; - if (n.bc.burst_count) - s << ", BURST_COUNT:" << n.bc.burst_count; - s << ", ARRAY_BASE:" << n.bc.array_base; - s << ", ARRAY_SIZE:" << n.bc.array_size; - } - - sblog << s.str() << "\n"; -} - -void bc_dump::dump_dw(unsigned dw_id, unsigned count) { - if (!bc_data) - return; - - assert(dw_id + count <= ndw); - - sblog.print_zw(dw_id, 4); - sblog << " "; - while (count--) { - sblog.print_zw_hex(bc_data[dw_id++], 8); - sblog << " "; - } -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp deleted file mode 100644 index b3ea2f3..0000000 --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp +++ /dev/null @@ -1,1007 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#define FBC_DEBUG 0 - -#if FBC_DEBUG -#define FBC_DUMP(q) do { q } while (0) -#else -#define FBC_DUMP(q) -#endif - -#include "sb_bc.h" -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) { - - alu_group_node *g = sh.create_alu_group(); - alu_node *a = sh.create_alu(); - - a->bc.set_op(ALU_OP0_NOP); - a->bc.last = 1; - - g->push_back(a); - b4->insert_before(g); -} - -int bc_finalizer::run() { - - run_on(sh.root); - - regions_vec &rv = sh.get_regions(); - for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E; - ++I) { - region_node *r = *I; - - assert(r); - - bool loop = r->is_loop(); - - if (loop) - finalize_loop(r); - else - finalize_if(r); - - r->expand(); - } - - cf_peephole(); - - // workaround for some problems on r6xx/7xx - // add ALU NOP to each vertex shader - if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) { - cf_node *c = sh.create_clause(NST_ALU_CLAUSE); - - alu_group_node *g = sh.create_alu_group(); - - alu_node *a = sh.create_alu(); - a->bc.set_op(ALU_OP0_NOP); - a->bc.last = 1; - - g->push_back(a); - c->push_back(g); - - sh.root->push_back(c); - - c = sh.create_cf(CF_OP_NOP); - sh.root->push_back(c); - - last_cf = c; - } - - if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) { - last_cf = sh.create_cf(CF_OP_NOP); - sh.root->push_back(last_cf); - } - - if (ctx.is_cayman()) { - if (!last_cf) { - cf_node *c = sh.create_cf(CF_OP_CF_END); - sh.root->push_back(c); - } else - last_cf->insert_after(sh.create_cf(CF_OP_CF_END)); - } else - last_cf->bc.end_of_program = 1; - - for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) { - cf_node *le = last_export[t]; - if (le) - le->bc.set_op(CF_OP_EXPORT_DONE); - } - - sh.ngpr = ngpr; - sh.nstack = nstack; - return 0; -} - -void bc_finalizer::finalize_loop(region_node* r) { - - update_nstack(r); - - cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10); - cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END); - - // Update last_cf, but don't overwrite it if it's outside the current loop nest since - // it may point to a cf that is later in program order. - // The single parent level check is sufficient since finalize_loop() is processed in - // reverse order from innermost to outermost loop nest level. - if (!last_cf || last_cf->get_parent_region() == r) { - last_cf = loop_end; - } - - loop_start->jump_after(loop_end); - loop_end->jump_after(loop_start); - - for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); - I != E; ++I) { - depart_node *dep = *I; - cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK); - loop_break->jump(loop_end); - dep->push_back(loop_break); - dep->expand(); - } - - // FIXME produces unnecessary LOOP_CONTINUE - for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end(); - I != E; ++I) { - repeat_node *rep = *I; - if (!(rep->parent == r && rep->prev == NULL)) { - cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE); - loop_cont->jump(loop_end); - rep->push_back(loop_cont); - } - rep->expand(); - } - - r->push_front(loop_start); - r->push_back(loop_end); -} - -void bc_finalizer::finalize_if(region_node* r) { - - update_nstack(r); - - // expecting the following control flow structure here: - // - region - // { - // - depart/repeat 1 (it may be depart/repeat for some outer region) - // { - // - if - // { - // - depart/repeat 2 (possibly for outer region) - // { - // - some optional code - // } - // } - // - optional code> ... - // } - // } - - container_node *repdep1 = static_cast(r->first); - assert(repdep1->is_depart() || repdep1->is_repeat()); - - if_node *n_if = static_cast(repdep1->first); - - if (n_if) { - - - assert(n_if->is_if()); - - ASSERTED container_node *repdep2 = static_cast(n_if->first); - assert(repdep2->is_depart() || repdep2->is_repeat()); - - cf_node *if_jump = sh.create_cf(CF_OP_JUMP); - cf_node *if_pop = sh.create_cf(CF_OP_POP); - - if (!last_cf || last_cf->get_parent_region() == r) { - last_cf = if_pop; - } - if_pop->bc.pop_count = 1; - if_pop->jump_after(if_pop); - - r->push_front(if_jump); - r->push_back(if_pop); - - /* the depart/repeat 1 is actually part of the "else" code. - * if it's a depart for an outer loop region it will want to - * insert a LOOP_BREAK or LOOP_CONTINUE in here, so we need - * to emit the else clause. - */ - bool has_else = n_if->next; - - if (repdep1->is_depart()) { - depart_node *dep1 = static_cast(repdep1); - if (dep1->target != r && dep1->target->is_loop()) - has_else = true; - } - - if (repdep1->is_repeat()) { - repeat_node *rep1 = static_cast(repdep1); - if (rep1->target != r && rep1->target->is_loop()) - has_else = true; - } - - if (has_else) { - cf_node *nelse = sh.create_cf(CF_OP_ELSE); - n_if->insert_after(nelse); - if_jump->jump(nelse); - nelse->jump_after(if_pop); - nelse->bc.pop_count = 1; - - } else { - if_jump->jump_after(if_pop); - if_jump->bc.pop_count = 1; - } - - n_if->expand(); - } - - for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); - I != E; ++I) { - (*I)->expand(); - } - r->departs.clear(); - assert(r->repeats.empty()); -} - -void bc_finalizer::run_on(container_node* c) { - node *prev_node = NULL; - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - node *n = *I; - - if (n->is_alu_group()) { - finalize_alu_group(static_cast(n), prev_node); - } else { - if (n->is_alu_clause()) { - cf_node *c = static_cast(n); - - if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) { - if (ctx.stack_workaround_8xx) { - region_node *r = c->get_parent_region(); - if (r) { - unsigned ifs, loops; - unsigned elems = get_stack_depth(r, loops, ifs); - unsigned dmod1 = elems % ctx.stack_entry_size; - unsigned dmod2 = (elems + 1) % ctx.stack_entry_size; - - if (elems && (!dmod1 || !dmod2)) - c->flags |= NF_ALU_STACK_WORKAROUND; - } - } else if (ctx.stack_workaround_9xx) { - region_node *r = c->get_parent_region(); - if (r) { - unsigned ifs, loops; - get_stack_depth(r, loops, ifs); - if (loops >= 2) - c->flags |= NF_ALU_STACK_WORKAROUND; - } - } - } - last_cf = c; - } else if (n->is_fetch_inst()) { - finalize_fetch(static_cast(n)); - } else if (n->is_cf_inst()) { - finalize_cf(static_cast(n)); - } - if (n->is_container()) - run_on(static_cast(n)); - } - prev_node = n; - } -} - -void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) { - - alu_node *last = NULL; - alu_group_node *prev_g = NULL; - bool add_nop = false; - if (prev_node && prev_node->is_alu_group()) { - prev_g = static_cast(prev_node); - } - - for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { - alu_node *n = static_cast(*I); - unsigned slot = n->bc.slot; - value *d = n->dst.empty() ? NULL : n->dst[0]; - - if (d && d->is_special_reg()) { - assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit() || d->is_lds_oq() || d->is_lds_access() || d->is_scratch()); - d = NULL; - } - - sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0); - - if (d) { - assert(fdst.chan() == slot || slot == SLOT_TRANS); - } - - if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman())) - n->bc.dst_gpr = fdst.sel(); - n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0; - - - if (d && d->is_rel() && d->rel && !d->rel->is_const()) { - n->bc.dst_rel = 1; - update_ngpr(d->array->gpr.sel() + d->array->array_size -1); - } else { - n->bc.dst_rel = 0; - } - - n->bc.write_mask = d != NULL; - n->bc.last = 0; - - if (n->bc.op_ptr->flags & AF_PRED) { - n->bc.update_pred = (n->dst[1] != NULL); - n->bc.update_exec_mask = (n->dst[2] != NULL); - } - - // FIXME handle predication here - n->bc.pred_sel = PRED_SEL_OFF; - - update_ngpr(n->bc.dst_gpr); - - add_nop |= finalize_alu_src(g, n, prev_g); - - last = n; - } - - if (add_nop) { - if (sh.get_ctx().r6xx_gpr_index_workaround) { - insert_rv6xx_load_ar_workaround(g); - } - } - last->bc.last = 1; -} - -bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) { - vvec &sv = a->src; - bool add_nop = false; - FBC_DUMP( - sblog << "finalize_alu_src: "; - dump::dump_op(a); - sblog << "\n"; - ); - - unsigned si = 0; - - for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) { - value *v = *I; - assert(v); - - bc_alu_src &src = a->bc.src[si]; - sel_chan sc; - src.rel = 0; - - sel_chan gpr; - - switch (v->kind) { - case VLK_REL_REG: - sc = v->get_final_gpr(); - src.sel = sc.sel(); - src.chan = sc.chan(); - if (!v->rel->is_const()) { - src.rel = 1; - update_ngpr(v->array->gpr.sel() + v->array->array_size -1); - if (prev && !add_nop) { - for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) { - alu_node *pn = static_cast(*pI); - if (pn->bc.dst_gpr == src.sel) { - add_nop = true; - break; - } - } - } - } else - src.rel = 0; - - break; - case VLK_REG: - gpr = v->get_final_gpr(); - src.sel = gpr.sel(); - src.chan = gpr.chan(); - update_ngpr(src.sel); - break; - case VLK_TEMP: - src.sel = v->gpr.sel(); - src.chan = v->gpr.chan(); - update_ngpr(src.sel); - break; - case VLK_UNDEF: - case VLK_CONST: { - literal lv = v->literal_value; - src.chan = 0; - - if (lv == literal(0)) - src.sel = ALU_SRC_0; - else if (lv == literal(0.5f)) - src.sel = ALU_SRC_0_5; - else if (lv == literal(1.0f)) - src.sel = ALU_SRC_1; - else if (lv == literal(1)) - src.sel = ALU_SRC_1_INT; - else if (lv == literal(-1)) - src.sel = ALU_SRC_M_1_INT; - else { - src.sel = ALU_SRC_LITERAL; - src.chan = g->literal_chan(lv); - src.value = lv; - } - break; - } - case VLK_KCACHE: { - cf_node *clause = static_cast(g->parent); - assert(clause->is_alu_clause()); - sel_chan k = translate_kcache(clause, v); - - assert(k && "kcache translation failed"); - - src.sel = k.sel(); - src.chan = k.chan(); - break; - } - case VLK_SPECIAL_REG: - if (v->select.sel() == SV_LDS_OQA) { - src.sel = ALU_SRC_LDS_OQ_A_POP; - src.chan = 0; - } else if (v->select.sel() == SV_LDS_OQB) { - src.sel = ALU_SRC_LDS_OQ_B_POP; - src.chan = 0; - } else { - src.sel = ALU_SRC_0; - src.chan = 0; - } - break; - case VLK_PARAM: - case VLK_SPECIAL_CONST: - src.sel = v->select.sel(); - src.chan = v->select.chan(); - break; - default: - assert(!"unknown value kind"); - break; - } - if (prev && !add_nop) { - for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) { - alu_node *pn = static_cast(*pI); - if (pn->bc.dst_rel) { - if (pn->bc.dst_gpr == src.sel) { - add_nop = true; - break; - } - } - } - } - } - - while (si < 3) { - a->bc.src[si++].sel = 0; - } - return add_nop; -} - -void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start) -{ - int reg = -1; - - for (unsigned chan = 0; chan < 4; ++chan) { - - dst.bc.dst_sel[chan] = SEL_MASK; - - unsigned sel = SEL_MASK; - - value *v = src.src[arg_start + chan]; - - if (!v || v->is_undef()) { - sel = SEL_MASK; - } else if (v->is_const()) { - literal l = v->literal_value; - if (l == literal(0)) - sel = SEL_0; - else if (l == literal(1.0f)) - sel = SEL_1; - else { - sblog << "invalid fetch constant operand " << chan << " "; - dump::dump_op(&src); - sblog << "\n"; - abort(); - } - - } else if (v->is_any_gpr()) { - unsigned vreg = v->gpr.sel(); - unsigned vchan = v->gpr.chan(); - - if (reg == -1) - reg = vreg; - else if ((unsigned)reg != vreg) { - sblog << "invalid fetch source operand " << chan << " "; - dump::dump_op(&src); - sblog << "\n"; - abort(); - } - - sel = vchan; - - } else { - sblog << "invalid fetch source operand " << chan << " "; - dump::dump_op(&src); - sblog << "\n"; - abort(); - } - - dst.bc.src_sel[chan] = sel; - } - - if (reg >= 0) - update_ngpr(reg); - - dst.bc.src_gpr = reg >= 0 ? reg : 0; -} - -void bc_finalizer::emit_set_grad(fetch_node* f) { - - assert(f->src.size() == 12 || f->src.size() == 13); - unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H }; - - unsigned arg_start = 0; - - for (unsigned op = 0; op < 2; ++op) { - fetch_node *n = sh.create_fetch(); - n->bc.set_op(ops[op]); - - arg_start += 4; - - copy_fetch_src(*n, *f, arg_start); - - f->insert_before(n); - } - -} - -void bc_finalizer::emit_set_texture_offsets(fetch_node &f) { - assert(f.src.size() == 8); - - fetch_node *n = sh.create_fetch(); - - n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS); - - copy_fetch_src(*n, f, 4); - - f.insert_before(n); -} - -void bc_finalizer::finalize_fetch(fetch_node* f) { - - int reg = -1; - - // src - - unsigned src_count = 4; - - unsigned flags = f->bc.op_ptr->flags; - - if (flags & FF_VTX) { - src_count = 1; - } else if (flags & FF_GDS) { - src_count = 2; - } else if (flags & FF_USEGRAD) { - emit_set_grad(f); - } else if (flags & FF_USE_TEXTURE_OFFSETS) { - emit_set_texture_offsets(*f); - } - - for (unsigned chan = 0; chan < src_count; ++chan) { - - unsigned sel = f->bc.src_sel[chan]; - - if (sel > SEL_W) - continue; - - value *v = f->src[chan]; - - if (v->is_undef()) { - sel = SEL_MASK; - } else if (v->is_const()) { - literal l = v->literal_value; - if (l == literal(0)) - sel = SEL_0; - else if (l == literal(1.0f)) - sel = SEL_1; - else { - sblog << "invalid fetch constant operand " << chan << " "; - dump::dump_op(f); - sblog << "\n"; - abort(); - } - - } else if (v->is_any_gpr()) { - unsigned vreg = v->gpr.sel(); - unsigned vchan = v->gpr.chan(); - - if (reg == -1) - reg = vreg; - else if ((unsigned)reg != vreg) { - sblog << "invalid fetch source operand " << chan << " "; - dump::dump_op(f); - sblog << "\n"; - abort(); - } - - sel = vchan; - - } else { - sblog << "invalid fetch source operand " << chan << " "; - dump::dump_op(f); - sblog << "\n"; - abort(); - } - - f->bc.src_sel[chan] = sel; - } - - if (reg >= 0) - update_ngpr(reg); - - f->bc.src_gpr = reg >= 0 ? reg : 0; - - // dst - - reg = -1; - - unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK}; - - for (unsigned chan = 0; chan < 4; ++chan) { - - unsigned sel = f->bc.dst_sel[chan]; - - if (sel == SEL_MASK) - continue; - - value *v = f->dst[chan]; - if (!v) - continue; - - if (v->is_any_gpr()) { - unsigned vreg = v->gpr.sel(); - unsigned vchan = v->gpr.chan(); - - if (reg == -1) - reg = vreg; - else if ((unsigned)reg != vreg) { - sblog << "invalid fetch dst operand " << chan << " "; - dump::dump_op(f); - sblog << "\n"; - abort(); - } - - dst_swz[vchan] = sel; - - } else { - sblog << "invalid fetch dst operand " << chan << " "; - dump::dump_op(f); - sblog << "\n"; - abort(); - } - - } - - for (unsigned i = 0; i < 4; ++i) - f->bc.dst_sel[i] = dst_swz[i]; - - if ((flags & FF_GDS) && reg == -1) { - f->bc.dst_sel[0] = SEL_MASK; - f->bc.dst_gpr = 0; - return ; - } - assert(reg >= 0); - - if (reg >= 0) - update_ngpr(reg); - - f->bc.dst_gpr = reg >= 0 ? reg : 0; -} - -void bc_finalizer::finalize_cf(cf_node* c) { - - unsigned flags = c->bc.op_ptr->flags; - - c->bc.end_of_program = 0; - last_cf = c; - - if (flags & CF_EXP) { - c->bc.set_op(CF_OP_EXPORT); - last_export[c->bc.type] = c; - - int reg = -1; - - for (unsigned chan = 0; chan < 4; ++chan) { - - unsigned sel = c->bc.sel[chan]; - - if (sel > SEL_W) - continue; - - value *v = c->src[chan]; - - if (v->is_undef()) { - sel = SEL_MASK; - } else if (v->is_const()) { - literal l = v->literal_value; - if (l == literal(0)) - sel = SEL_0; - else if (l == literal(1.0f)) - sel = SEL_1; - else { - sblog << "invalid export constant operand " << chan << " "; - dump::dump_op(c); - sblog << "\n"; - abort(); - } - - } else if (v->is_any_gpr()) { - unsigned vreg = v->gpr.sel(); - unsigned vchan = v->gpr.chan(); - - if (reg == -1) - reg = vreg; - else if ((unsigned)reg != vreg) { - sblog << "invalid export source operand " << chan << " "; - dump::dump_op(c); - sblog << "\n"; - abort(); - } - - sel = vchan; - - } else { - sblog << "invalid export source operand " << chan << " "; - dump::dump_op(c); - sblog << "\n"; - abort(); - } - - c->bc.sel[chan] = sel; - } - - if (reg >= 0) - update_ngpr(reg); - - c->bc.rw_gpr = reg >= 0 ? reg : 0; - - } else if (flags & CF_MEM) { - - int reg = -1; - unsigned mask = 0; - - - for (unsigned chan = 0; chan < 4; ++chan) { - value *v; - if (ctx.hw_class == HW_CLASS_R600 && c->bc.op == CF_OP_MEM_SCRATCH && - (c->bc.type == 2 || c->bc.type == 3)) - v = c->dst[chan]; - else - v = c->src[chan]; - - if (!v || v->is_undef()) - continue; - - if (!v->is_any_gpr() || v->gpr.chan() != chan) { - sblog << "invalid source operand " << chan << " "; - dump::dump_op(c); - sblog << "\n"; - abort(); - } - unsigned vreg = v->gpr.sel(); - if (reg == -1) - reg = vreg; - else if ((unsigned)reg != vreg) { - sblog << "invalid source operand " << chan << " "; - dump::dump_op(c); - sblog << "\n"; - abort(); - } - - mask |= (1 << chan); - } - - if (reg >= 0) - update_ngpr(reg); - - c->bc.rw_gpr = reg >= 0 ? reg : 0; - c->bc.comp_mask = mask; - - if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { - - reg = -1; - - for (unsigned chan = 0; chan < 4; ++chan) { - value *v = c->src[4 + chan]; - if (!v || v->is_undef()) - continue; - - if (!v->is_any_gpr() || v->gpr.chan() != chan) { - sblog << "invalid source operand " << chan << " "; - dump::dump_op(c); - sblog << "\n"; - abort(); - } - unsigned vreg = v->gpr.sel(); - if (reg == -1) - reg = vreg; - else if ((unsigned)reg != vreg) { - sblog << "invalid source operand " << chan << " "; - dump::dump_op(c); - sblog << "\n"; - abort(); - } - } - - assert(reg >= 0); - - if (reg >= 0) - update_ngpr(reg); - - c->bc.index_gpr = reg >= 0 ? reg : 0; - } - } else if (flags & CF_CALL) { - update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1); - } -} - -sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) { - unsigned sel = v->select.kcache_sel(); - unsigned bank = v->select.kcache_bank(); - unsigned chan = v->select.chan(); - static const unsigned kc_base[] = {128, 160, 256, 288}; - - sel &= 4095; - - unsigned line = sel >> 4; - - for (unsigned k = 0; k < 4; ++k) { - bc_kcache &kc = alu->bc.kc[k]; - - if (kc.mode == KC_LOCK_NONE) - break; - - if (kc.bank == bank && (kc.addr == line || - (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) { - - sel = kc_base[k] + (sel - (kc.addr << 4)); - - return sel_chan(sel, chan); - } - } - - assert(!"kcache translation error"); - return 0; -} - -void bc_finalizer::update_ngpr(unsigned gpr) { - if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr) - ngpr = gpr + 1; -} - -unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops, - unsigned &ifs, unsigned add) { - unsigned stack_elements = add; - bool has_non_wqm_push = (add != 0); - region_node *r = n->is_region() ? - static_cast(n) : n->get_parent_region(); - - loops = 0; - ifs = 0; - - while (r) { - if (r->is_loop()) { - ++loops; - } else { - ++ifs; - has_non_wqm_push = true; - } - r = r->get_parent_region(); - } - stack_elements += (loops * ctx.stack_entry_size) + ifs; - - // reserve additional elements in some cases - switch (ctx.hw_class) { - case HW_CLASS_R600: - case HW_CLASS_R700: - // If any non-WQM push is invoked, 2 elements should be reserved. - if (has_non_wqm_push) - stack_elements += 2; - break; - case HW_CLASS_CAYMAN: - // If any stack operation is invoked, 2 elements should be reserved - if (stack_elements) - stack_elements += 2; - break; - case HW_CLASS_EVERGREEN: - // According to the docs we need to reserve 1 element for each of the - // following cases: - // 1) non-WQM push is used with WQM/LOOP frames on stack - // 2) ALU_ELSE_AFTER is used at the point of max stack usage - // NOTE: - // It was found that the conditions above are not sufficient, there are - // other cases where we also need to reserve stack space, that's why - // we always reserve 1 stack element if we have non-WQM push on stack. - // Condition 2 is ignored for now because we don't use this instruction. - if (has_non_wqm_push) - ++stack_elements; - break; - case HW_CLASS_UNKNOWN: - assert(0); - } - return stack_elements; -} - -void bc_finalizer::update_nstack(region_node* r, unsigned add) { - unsigned loops = 0; - unsigned ifs = 0; - unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add; - - // XXX all chips expect this value to be computed using 4 as entry size, - // not the real entry size - unsigned stack_entries = (elems + 3) >> 2; - - if (nstack < stack_entries) - nstack = stack_entries; -} - -void bc_finalizer::cf_peephole() { - if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) { - for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E; - I = N) { - N = I; ++N; - cf_node *c = static_cast(*I); - - if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && - (c->flags & NF_ALU_STACK_WORKAROUND)) { - cf_node *push = sh.create_cf(CF_OP_PUSH); - c->insert_before(push); - push->jump(c); - c->bc.set_op(CF_OP_ALU); - } - } - } - - for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E; - I = N) { - N = I; ++N; - - cf_node *c = static_cast(*I); - - if (c->jump_after_target) { - if (c->jump_target->next == NULL) { - c->jump_target->insert_after(sh.create_cf(CF_OP_NOP)); - if (last_cf == c->jump_target) - last_cf = static_cast(c->jump_target->next); - } - c->jump_target = static_cast(c->jump_target->next); - c->jump_after_target = false; - } - - if (c->is_cf_op(CF_OP_POP)) { - node *p = c->prev; - if (p->is_alu_clause()) { - cf_node *a = static_cast(p); - - if (a->bc.op == CF_OP_ALU) { - a->bc.set_op(CF_OP_ALU_POP_AFTER); - c->remove(); - } - } - } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) { - // if JUMP is immediately followed by its jump target, - // then JUMP is useless and we can eliminate it - c->remove(); - } - } -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc b/src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc deleted file mode 100644 index c6b9ddb..0000000 --- a/src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc +++ /dev/null @@ -1,607 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -// TODO add all formats - -// CF - -BC_FORMAT_BEGIN_HW(CF_WORD0, R6R7) -BC_FIELD(CF_WORD0, ADDR, ADDR, 31, 0) -BC_FORMAT_END(CF_WORD0) - -BC_FORMAT_BEGIN_HW(CF_WORD0, EGCM) -BC_FIELD(CF_WORD0, ADDR, ADDR, 23, 0) -BC_FIELD(CF_WORD0, JUMPTABLE_SEL, JTS, 26, 24) -BC_RSRVD(CF_WORD0, 31, 27) -BC_FORMAT_END(CF_WORD0) - - -BC_FORMAT_BEGIN_HW(CF_GWS_WORD0, EGCM) -BC_FIELD(CF_GWS_WORD0, VALUE, VAL, 9, 0) -BC_RSRVD(CF_GWS_WORD0, 15, 10) -BC_FIELD(CF_GWS_WORD0, RESOURCE, RSRC, 20, 16) -BC_RSRVD(CF_GWS_WORD0, 24, 21) -BC_FIELD(CF_GWS_WORD0, SIGN, SIGN, 25, 25) -BC_FIELD(CF_GWS_WORD0, VAL_INDEX_MODE, VIM, 27, 26) -BC_FIELD(CF_GWS_WORD0, RSRC_INDEX_MODE, RIM, 29, 28) -BC_FIELD(CF_GWS_WORD0, GWS_OPCODE, GWS_OP, 31, 30) -BC_FORMAT_END(CF_GWS_WORD0) - - -BC_FORMAT_BEGIN_HW(CF_WORD1, R6R7) -BC_FIELD(CF_WORD1, POP_COUNT, PC, 2, 0) -BC_FIELD(CF_WORD1, CF_CONST, CF_CONST, 7, 3) -BC_FIELD(CF_WORD1, COND, COND, 9, 8) -BC_FIELD(CF_WORD1, COUNT, COUNT, 12, 10) -BC_FIELD(CF_WORD1, CALL_COUNT, CALL_CNT, 18, 13) -BC_FIELD(CF_WORD1, COUNT_3, COUNT_3, 19, 19) -BC_RSRVD(CF_WORD1, 20, 20) -BC_FIELD(CF_WORD1, END_OF_PROGRAM, EOP, 21, 21) -BC_FIELD(CF_WORD1, VALID_PIXEL_MODE, VPM, 22, 22) -BC_FIELD(CF_WORD1, CF_INST, CF_INST, 29, 23) -BC_FIELD(CF_WORD1, WHOLE_QUAD_MODE, WQM, 30, 30) -BC_FIELD(CF_WORD1, BARRIER, B, 31, 31) -BC_FORMAT_END(CF_WORD1) - -BC_FORMAT_BEGIN_HW(CF_WORD1, EG) -BC_FIELD(CF_WORD1, POP_COUNT, PC, 2, 0) -BC_FIELD(CF_WORD1, CF_CONST, CF_CONST, 7, 3) -BC_FIELD(CF_WORD1, COND, COND, 9, 8) -BC_FIELD(CF_WORD1, COUNT, COUNT, 15, 10) -BC_RSRVD(CF_WORD1, 19, 16) -BC_FIELD(CF_WORD1, VALID_PIXEL_MODE, VPM, 20, 20) -BC_FIELD(CF_WORD1, END_OF_PROGRAM, EOP, 21, 21) -BC_FIELD(CF_WORD1, CF_INST, CF_INST, 29, 22) -BC_FIELD(CF_WORD1, WHOLE_QUAD_MODE, WQM, 30, 30) -BC_FIELD(CF_WORD1, BARRIER, B, 31, 31) -BC_FORMAT_END(CF_WORD1) - -BC_FORMAT_BEGIN_HW(CF_WORD1, CM) -BC_FIELD(CF_WORD1, POP_COUNT, PC, 2, 0) -BC_FIELD(CF_WORD1, CF_CONST, CF_CONST, 7, 3) -BC_FIELD(CF_WORD1, COND, COND, 9, 8) -BC_FIELD(CF_WORD1, COUNT, COUNT, 15, 10) -BC_RSRVD(CF_WORD1, 19, 16) -BC_FIELD(CF_WORD1, VALID_PIXEL_MODE, VPM, 20, 20) -BC_RSRVD(CF_WORD1, 21, 21) -BC_FIELD(CF_WORD1, CF_INST, CF_INST, 29, 22) -BC_RSRVD(CF_WORD1, 30, 30) -BC_FIELD(CF_WORD1, BARRIER, B, 31, 31) -BC_FORMAT_END(CF_WORD1) - - -BC_FORMAT_BEGIN(CF_ALU_WORD0) -BC_FIELD(CF_ALU_WORD0, ADDR, ADDR, 21, 0) -BC_FIELD(CF_ALU_WORD0, KCACHE_BANK0, KB0, 25, 22) -BC_FIELD(CF_ALU_WORD0, KCACHE_BANK1, KB1, 29, 26) -BC_FIELD(CF_ALU_WORD0, KCACHE_MODE0, KM0, 31, 30) -BC_FORMAT_END(CF_ALU_WORD0) - -BC_FORMAT_BEGIN_HW(CF_ALU_WORD1, R6) -BC_FIELD(CF_ALU_WORD1, KCACHE_MODE1, KM1, 1, 0) -BC_FIELD(CF_ALU_WORD1, KCACHE_ADDR0, KA0, 9, 2) -BC_FIELD(CF_ALU_WORD1, KCACHE_ADDR1, KA1, 17, 10) -BC_FIELD(CF_ALU_WORD1, COUNT, COUNT, 24, 18) -BC_FIELD(CF_ALU_WORD1, USES_WATERFALL, UW, 25, 25) -BC_FIELD(CF_ALU_WORD1, CF_INST, CF_INST, 29, 26) -BC_FIELD(CF_ALU_WORD1, WHOLE_QUAD_MODE, WQM, 30, 30) -BC_FIELD(CF_ALU_WORD1, BARRIER, B, 31, 31) -BC_FORMAT_END(CF_ALU_WORD1) - -BC_FORMAT_BEGIN_HW(CF_ALU_WORD1, R7EGCM) -BC_FIELD(CF_ALU_WORD1, KCACHE_MODE1, KM1, 1, 0) -BC_FIELD(CF_ALU_WORD1, KCACHE_ADDR0, KA0, 9, 2) -BC_FIELD(CF_ALU_WORD1, KCACHE_ADDR1, KA1, 17, 10) -BC_FIELD(CF_ALU_WORD1, COUNT, COUNT, 24, 18) -BC_FIELD(CF_ALU_WORD1, ALT_CONST, ALT_C, 25, 25) -BC_FIELD(CF_ALU_WORD1, CF_INST, CF_INST, 29, 26) -BC_FIELD(CF_ALU_WORD1, WHOLE_QUAD_MODE, WQM, 30, 30) -BC_FIELD(CF_ALU_WORD1, BARRIER, B, 31, 31) -BC_FORMAT_END(CF_ALU_WORD1) - - -BC_FORMAT_BEGIN_HW(CF_ALU_WORD0_EXT, EGCM) -BC_RSRVD(CF_ALU_WORD0_EXT, 3, 0) -BC_FIELD(CF_ALU_WORD0_EXT, KCACHE_BANK_INDEX_MODE0, KBIM0, 5, 4) -BC_FIELD(CF_ALU_WORD0_EXT, KCACHE_BANK_INDEX_MODE1, KBIM1, 7, 6) -BC_FIELD(CF_ALU_WORD0_EXT, KCACHE_BANK_INDEX_MODE2, KBIM2, 9, 8) -BC_FIELD(CF_ALU_WORD0_EXT, KCACHE_BANK_INDEX_MODE3, KBIM3, 11, 10) -BC_RSRVD(CF_ALU_WORD0_EXT, 21, 12) -BC_FIELD(CF_ALU_WORD0_EXT, KCACHE_BANK2, KB2, 25, 22) -BC_FIELD(CF_ALU_WORD0_EXT, KCACHE_BANK3, KB3, 29, 26) -BC_FIELD(CF_ALU_WORD0_EXT, KCACHE_MODE2, KM2, 31, 30) -BC_FORMAT_END(CF_ALU_WORD0_EXT) - -BC_FORMAT_BEGIN_HW(CF_ALU_WORD1_EXT, EGCM) -BC_FIELD(CF_ALU_WORD1_EXT, KCACHE_MODE3, KM3, 1, 0) -BC_FIELD(CF_ALU_WORD1_EXT, KCACHE_ADDR2, KA2, 9, 2) -BC_FIELD(CF_ALU_WORD1_EXT, KCACHE_ADDR3, KA3, 17, 10) -BC_RSRVD(CF_ALU_WORD1_EXT, 25, 18) -BC_FIELD(CF_ALU_WORD1_EXT, CF_INST, CF_INST, 29, 26) -BC_RSRVD(CF_ALU_WORD1_EXT, 30, 30) -BC_FIELD(CF_ALU_WORD1_EXT, BARRIER, B, 31, 31) -BC_FORMAT_END(CF_ALU_WORD1_EXT) - - -BC_FORMAT_BEGIN(CF_ALLOC_EXPORT_WORD0) -BC_FIELD(CF_ALLOC_EXPORT_WORD0, ARRAY_BASE, ARR_BS, 12, 0) -BC_FIELD(CF_ALLOC_EXPORT_WORD0, TYPE, TYPE, 14, 13) -BC_FIELD(CF_ALLOC_EXPORT_WORD0, RW_GPR, RW_GPR, 21, 15) -BC_FIELD(CF_ALLOC_EXPORT_WORD0, RW_REL, RW_REL, 22, 22) -BC_FIELD(CF_ALLOC_EXPORT_WORD0, INDEX_GPR, IND_GPR, 29, 23) -BC_FIELD(CF_ALLOC_EXPORT_WORD0, ELEM_SIZE, ES, 31, 30) -BC_FORMAT_END(CF_ALLOC_EXPORT_WORD0) - -BC_FORMAT_BEGIN_HW(CF_ALLOC_EXPORT_WORD0_RAT, EGCM) -BC_FIELD(CF_ALLOC_EXPORT_WORD0_RAT, RAT_ID, R_ID, 3, 0) -BC_FIELD(CF_ALLOC_EXPORT_WORD0_RAT, RAT_INST, R_INST, 9, 4) -BC_RSRVD(CF_ALLOC_EXPORT_WORD0_RAT, 10, 10) -BC_FIELD(CF_ALLOC_EXPORT_WORD0_RAT, RAT_INDEX_MODE, RIM, 12, 11) -BC_FIELD(CF_ALLOC_EXPORT_WORD0_RAT, TYPE, TYPE, 14, 13) -BC_FIELD(CF_ALLOC_EXPORT_WORD0_RAT, RW_GPR, RW_GPR, 21, 15) -BC_FIELD(CF_ALLOC_EXPORT_WORD0_RAT, RW_REL, RW_REL, 22, 22) -BC_FIELD(CF_ALLOC_EXPORT_WORD0_RAT, INDEX_GPR, IND_GPR, 29, 23) -BC_FIELD(CF_ALLOC_EXPORT_WORD0_RAT, ELEM_SIZE, ES, 31, 30) -BC_FORMAT_END(CF_ALLOC_EXPORT_WORD0_RAT) - -BC_FORMAT_BEGIN_HW(CF_ALLOC_EXPORT_WORD1_BUF, R6R7) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, ARR_SIZE, ARR_SZ, 11, 0) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, COMP_MASK, MASK, 15, 12) -BC_RSRVD(CF_ALLOC_EXPORT_WORD1_BUF, 16, 16) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, BURST_COUNT, BURST, 20, 17) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, END_OF_PROGRAM, EOP, 21, 21) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, VALID_PIXEL_MODE, VPM, 22, 22) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, CF_INST, CF_INST, 29, 23) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, WHOLE_QUAD_MODE, WQM, 30, 30) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, BARRIER, B, 31, 31) -BC_FORMAT_END(CF_ALLOC_EXPORT_WORD1_BUF) - -BC_FORMAT_BEGIN_HW(CF_ALLOC_EXPORT_WORD1_BUF, EG) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, ARR_SIZE, ARR_SZ, 11, 0) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, COMP_MASK, MASK, 15, 12) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, BURST_COUNT, BURST, 19, 16) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, VALID_PIXEL_MODE, VPM, 20, 20) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, END_OF_PROGRAM, EOP, 21, 21) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, CF_INST, CF_INST, 29, 22) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, MARK, MARK, 30, 30) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, BARRIER, B, 31, 31) -BC_FORMAT_END(CF_ALLOC_EXPORT_WORD1_BUF) - -BC_FORMAT_BEGIN_HW(CF_ALLOC_EXPORT_WORD1_BUF, CM) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, ARR_SIZE, ARR_SZ, 11, 0) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, COMP_MASK, MASK, 15, 12) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, BURST_COUNT, BURST, 19, 16) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, VALID_PIXEL_MODE, VPM, 20, 20) -BC_RSRVD(CF_ALLOC_EXPORT_WORD1_BUF, 21, 21) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, CF_INST, CF_INST, 29, 22) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, MARK, MARK, 30, 30) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_BUF, BARRIER, B, 31, 31) -BC_FORMAT_END(CF_ALLOC_EXPORT_WORD1_BUF) - -BC_FORMAT_BEGIN_HW(CF_ALLOC_EXPORT_WORD1_SWIZ, R6R7) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, SEL_X, SEL_X, 2, 0) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, SEL_Y, SEL_Y, 5, 3) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, SEL_Z, SEL_Z, 8, 6) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, SEL_W, SEL_W, 11, 9) -BC_RSRVD(CF_ALLOC_EXPORT_WORD1_SWIZ, 16, 12) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, BURST_COUNT, BURST, 20, 17) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, END_OF_PROGRAM, EOP, 21, 21) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, VALID_PIXEL_MODE, VPM, 22, 22) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, CF_INST, CF_INST, 29, 23) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, WHOLE_QUAD_MODE, WQM, 30, 30) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, BARRIER, B, 31, 31) -BC_FORMAT_END(CF_ALLOC_EXPORT_WORD1_SWIZ) - -BC_FORMAT_BEGIN_HW(CF_ALLOC_EXPORT_WORD1_SWIZ, EG) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, SEL_X, SEL_X, 2, 0) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, SEL_Y, SEL_Y, 5, 3) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, SEL_Z, SEL_Z, 8, 6) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, SEL_W, SEL_W, 11, 9) -BC_RSRVD(CF_ALLOC_EXPORT_WORD1_SWIZ, 15, 12) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, BURST_COUNT, BURST, 19, 16) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, VALID_PIXEL_MODE, VPM, 20, 20) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, END_OF_PROGRAM, EOP, 21, 21) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, CF_INST, CF_INST, 29, 22) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, MARK, M, 30, 30) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, BARRIER, B, 31, 31) -BC_FORMAT_END(CF_ALLOC_EXPORT_WORD1_SWIZ) - -BC_FORMAT_BEGIN_HW(CF_ALLOC_EXPORT_WORD1_SWIZ, CM) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, SEL_X, SEL_X, 2, 0) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, SEL_Y, SEL_Y, 5, 3) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, SEL_Z, SEL_Z, 8, 6) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, SEL_W, SEL_W, 11, 9) -BC_RSRVD(CF_ALLOC_EXPORT_WORD1_SWIZ, 15, 12) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, BURST_COUNT, BURST, 19, 16) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, VALID_PIXEL_MODE, VPM, 20, 20) -BC_RSRVD(CF_ALLOC_EXPORT_WORD1_SWIZ, 21, 21) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, CF_INST, CF_INST, 29, 22) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, MARK, M, 30, 30) -BC_FIELD(CF_ALLOC_EXPORT_WORD1_SWIZ, BARRIER, B, 31, 31) -BC_FORMAT_END(CF_ALLOC_EXPORT_WORD1_SWIZ) - -// ALU - -BC_FORMAT_BEGIN(ALU_WORD0) -BC_FIELD(ALU_WORD0, SRC0_SEL, S0S, 8, 0) -BC_FIELD(ALU_WORD0, SRC0_REL, S0R, 9, 9) -BC_FIELD(ALU_WORD0, SRC0_CHAN, S0C, 11, 10) -BC_FIELD(ALU_WORD0, SRC0_NEG, S0N, 12, 12) -BC_FIELD(ALU_WORD0, SRC1_SEL, S1S, 21, 13) -BC_FIELD(ALU_WORD0, SRC1_REL, S1R, 22, 22) -BC_FIELD(ALU_WORD0, SRC1_CHAN, S1C, 24, 23) -BC_FIELD(ALU_WORD0, SRC1_NEG, S1N, 25, 25) -BC_FIELD(ALU_WORD0, INDEX_MODE, IM, 28, 26) -BC_FIELD(ALU_WORD0, PRED_SEL, PS, 30, 29) -BC_FIELD(ALU_WORD0, LAST, L, 31, 31) -BC_FORMAT_END(ALU_WORD0) - -BC_FORMAT_BEGIN_HW(ALU_WORD1_OP2, R6) -BC_FIELD(ALU_WORD1_OP2, SRC0_ABS, S0A, 0, 0) -BC_FIELD(ALU_WORD1_OP2, SRC1_ABS, S1A, 1, 1) -BC_FIELD(ALU_WORD1_OP2, UPDATE_EXEC_MASK, UEM, 2, 2) -BC_FIELD(ALU_WORD1_OP2, UPDATE_PRED, UP, 3, 3) -BC_FIELD(ALU_WORD1_OP2, WRITE_MASK, WM, 4, 4) -BC_FIELD(ALU_WORD1_OP2, FOG_MERGE, FM, 5, 5) -BC_FIELD(ALU_WORD1_OP2, OMOD, OMOD, 7, 6) -BC_FIELD(ALU_WORD1_OP2, ALU_INST, INST, 17, 8) -BC_FIELD(ALU_WORD1_OP2, BANK_SWIZZLE, BS, 20, 18) -BC_FIELD(ALU_WORD1_OP2, DST_GPR, DGPR, 27, 21) -BC_FIELD(ALU_WORD1_OP2, DST_REL, DR, 28, 28) -BC_FIELD(ALU_WORD1_OP2, DST_CHAN, DC, 30, 29) -BC_FIELD(ALU_WORD1_OP2, CLAMP, C, 31, 31) -BC_FORMAT_END(ALU_WORD1_OP2) - -BC_FORMAT_BEGIN_HW(ALU_WORD1_OP2, R7EGCM) -BC_FIELD(ALU_WORD1_OP2, SRC0_ABS, S0A, 0, 0) -BC_FIELD(ALU_WORD1_OP2, SRC1_ABS, S1A, 1, 1) -BC_FIELD(ALU_WORD1_OP2, UPDATE_EXEC_MASK, UEM, 2, 2) -BC_FIELD(ALU_WORD1_OP2, UPDATE_PRED, UP, 3, 3) -BC_FIELD(ALU_WORD1_OP2, WRITE_MASK, WM, 4, 4) -BC_FIELD(ALU_WORD1_OP2, OMOD, OMOD, 6, 5) -BC_FIELD(ALU_WORD1_OP2, ALU_INST, INST, 17, 7) -BC_FIELD(ALU_WORD1_OP2, BANK_SWIZZLE, BS, 20, 18) -BC_FIELD(ALU_WORD1_OP2, DST_GPR, DGPR, 27, 21) -BC_FIELD(ALU_WORD1_OP2, DST_REL, DR, 28, 28) -BC_FIELD(ALU_WORD1_OP2, DST_CHAN, DC, 30, 29) -BC_FIELD(ALU_WORD1_OP2, CLAMP, C, 31, 31) -BC_FORMAT_END(ALU_WORD1_OP2) - -BC_FORMAT_BEGIN_HW(ALU_WORD1_OP2_MOVA, CM) -BC_FIELD(ALU_WORD1_OP2_MOVA, SRC0_ABS, S0A, 0, 0) -BC_FIELD(ALU_WORD1_OP2_MOVA, SRC1_ABS, S1A, 1, 1) -BC_FIELD(ALU_WORD1_OP2_MOVA, UPDATE_EXEC_MASK, UEM, 2, 2) -BC_FIELD(ALU_WORD1_OP2_MOVA, UPDATE_PRED, UP, 3, 3) -BC_FIELD(ALU_WORD1_OP2_MOVA, WRITE_MASK, WM, 4, 4) -BC_FIELD(ALU_WORD1_OP2_MOVA, OMOD, OMOD, 6, 5) -BC_FIELD(ALU_WORD1_OP2_MOVA, ALU_INST, INST, 17, 7) -BC_FIELD(ALU_WORD1_OP2_MOVA, BANK_SWIZZLE, BS, 20, 18) -BC_FIELD(ALU_WORD1_OP2_MOVA, MOVA_DST, MOVA_DST, 27, 21) -BC_FIELD(ALU_WORD1_OP2_MOVA, DST_REL, DR, 28, 28) -BC_FIELD(ALU_WORD1_OP2_MOVA, DST_CHAN, DC, 30, 29) -BC_FIELD(ALU_WORD1_OP2_MOVA, CLAMP, C, 31, 31) -BC_FORMAT_END(ALU_WORD1_OP2_MOVA) - -BC_FORMAT_BEGIN_HW(ALU_WORD1_OP2_EXEC_MASK, CM) -BC_FIELD(ALU_WORD1_OP2_EXEC_MASK, SRC0_ABS, S0A, 0, 0) -BC_FIELD(ALU_WORD1_OP2_EXEC_MASK, SRC1_ABS, S1A, 1, 1) -BC_FIELD(ALU_WORD1_OP2_EXEC_MASK, UPDATE_EXEC_MASK, UEM, 2, 2) -BC_FIELD(ALU_WORD1_OP2_EXEC_MASK, UPDATE_PRED, UP, 3, 3) -BC_FIELD(ALU_WORD1_OP2_EXEC_MASK, WRITE_MASK, WM, 4, 4) -BC_FIELD(ALU_WORD1_OP2_EXEC_MASK, EXECUTE_MASK_OP, EMO, 6, 5) -BC_FIELD(ALU_WORD1_OP2_EXEC_MASK, ALU_INST, INST, 17, 7) -BC_FIELD(ALU_WORD1_OP2_EXEC_MASK, BANK_SWIZZLE, BS, 20, 18) -BC_FIELD(ALU_WORD1_OP2_EXEC_MASK, DST_GPR, DGPR, 27, 21) -BC_FIELD(ALU_WORD1_OP2_EXEC_MASK, DST_REL, DR, 28, 28) -BC_FIELD(ALU_WORD1_OP2_EXEC_MASK, DST_CHAN, DC, 30, 29) -BC_FIELD(ALU_WORD1_OP2_EXEC_MASK, CLAMP, C, 31, 31) -BC_FORMAT_END(ALU_WORD1_OP2_EXEC_MASK) - -BC_FORMAT_BEGIN(ALU_WORD1_OP3) -BC_FIELD(ALU_WORD1_OP3, SRC2_SEL, S2S, 8, 0) -BC_FIELD(ALU_WORD1_OP3, SRC2_REL, S2R, 9, 9) -BC_FIELD(ALU_WORD1_OP3, SRC2_CHAN, S2C, 11, 10) -BC_FIELD(ALU_WORD1_OP3, SRC2_NEG, S2N, 12, 12) -BC_FIELD(ALU_WORD1_OP3, ALU_INST, INST, 17, 13) -BC_FIELD(ALU_WORD1_OP3, BANK_SWIZZLE, BS, 20, 18) -BC_FIELD(ALU_WORD1_OP3, DST_GPR, DGPR, 27, 21) -BC_FIELD(ALU_WORD1_OP3, DST_REL, DR, 28, 28) -BC_FIELD(ALU_WORD1_OP3, DST_CHAN, DC, 30, 29) -BC_FIELD(ALU_WORD1_OP3, CLAMP, C, 31, 31) -BC_FORMAT_END(ALU_WORD1_OP3) - - -BC_FORMAT_BEGIN_HW(ALU_WORD0_LDS_IDX_OP, EGCM) -BC_FIELD(ALU_WORD0_LDS_IDX_OP, SRC0_SEL, S0S, 8, 0) -BC_FIELD(ALU_WORD0_LDS_IDX_OP, SRC0_REL, S0R, 9, 9) -BC_FIELD(ALU_WORD0_LDS_IDX_OP, SRC0_CHAN, S0C, 11, 10) -BC_FIELD(ALU_WORD0_LDS_IDX_OP, IDX_OFFSET_4, IO4, 12, 12) -BC_FIELD(ALU_WORD0_LDS_IDX_OP, SRC1_SEL, S1S, 21, 13) -BC_FIELD(ALU_WORD0_LDS_IDX_OP, SRC1_REL, S1R, 22, 22) -BC_FIELD(ALU_WORD0_LDS_IDX_OP, SRC1_CHAN, S1C, 24, 23) -BC_FIELD(ALU_WORD0_LDS_IDX_OP, IDX_OFFSET_5, IO5, 25, 25) -BC_FIELD(ALU_WORD0_LDS_IDX_OP, INDEX_MODE, IM, 28, 26) -BC_FIELD(ALU_WORD0_LDS_IDX_OP, PRED_SEL, PS, 30, 29) -BC_FIELD(ALU_WORD0_LDS_IDX_OP, LAST, L, 31, 31) -BC_FORMAT_END(ALU_WORD0_LDS_IDX_OP) - -BC_FORMAT_BEGIN_HW(ALU_WORD1_LDS_IDX_OP, EGCM) -BC_FIELD(ALU_WORD1_LDS_IDX_OP, SRC2_SEL, S2S, 8, 0) -BC_FIELD(ALU_WORD1_LDS_IDX_OP, SRC2_REL, S2R, 9, 9) -BC_FIELD(ALU_WORD1_LDS_IDX_OP, SRC2_CHAN, S2C, 11, 10) -BC_FIELD(ALU_WORD1_LDS_IDX_OP, IDX_OFFSET_1, IO1, 12, 12) -BC_FIELD(ALU_WORD1_LDS_IDX_OP, ALU_INST, INST, 17, 13) -BC_FIELD(ALU_WORD1_LDS_IDX_OP, BANK_SWIZZLE, BS, 20, 18) -BC_FIELD(ALU_WORD1_LDS_IDX_OP, LDS_OP, LDS_OP, 26, 21) -BC_FIELD(ALU_WORD1_LDS_IDX_OP, IDX_OFFSET_0, IO0, 27, 27) -BC_FIELD(ALU_WORD1_LDS_IDX_OP, IDX_OFFSET_2, IO2, 28, 28) -BC_FIELD(ALU_WORD1_LDS_IDX_OP, DST_CHAN, DC, 30, 29) -BC_FIELD(ALU_WORD1_LDS_IDX_OP, IDX_OFFSET_3, IO3, 31, 31) -BC_FORMAT_END(ALU_WORD1_LDS_IDX_OP) - - -BC_FORMAT_BEGIN_HW(ALU_WORD1_LDS_DIRECT_LITERAL_LO, EGCM) -BC_FIELD(ALU_WORD1_LDS_DIRECT_LITERAL_LO, OFFSET_A, OFS_A, 12, 0) -BC_FIELD(ALU_WORD1_LDS_DIRECT_LITERAL_LO, STRIDE_A, STR_A, 19, 13) -BC_RSRVD(ALU_WORD1_LDS_DIRECT_LITERAL_LO, 21, 20) -BC_FIELD(ALU_WORD1_LDS_DIRECT_LITERAL_LO, THREAD_REL_A, THR_A, 22, 22) -BC_RSRVD(ALU_WORD1_LDS_DIRECT_LITERAL_LO, 31, 22) -BC_FORMAT_END(ALU_WORD1_LDS_DIRECT_LITERAL_LO) - -BC_FORMAT_BEGIN_HW(ALU_WORD1_LDS_DIRECT_LITERAL_HI, EGCM) -BC_FIELD(ALU_WORD1_LDS_DIRECT_LITERAL_HI, OFFSET_B, OFS_B, 12, 0) -BC_FIELD(ALU_WORD1_LDS_DIRECT_LITERAL_HI, STRIDE_B, STR_B, 19, 13) -BC_RSRVD(ALU_WORD1_LDS_DIRECT_LITERAL_HI, 21, 20) -BC_FIELD(ALU_WORD1_LDS_DIRECT_LITERAL_HI, THREAD_REL_B, THR_B, 22, 22) -BC_RSRVD(ALU_WORD1_LDS_DIRECT_LITERAL_HI, 30, 22) -BC_FIELD(ALU_WORD1_LDS_DIRECT_LITERAL_HI, DIRECT_READ_32, DR32, 31, 31) -BC_FORMAT_END(ALU_WORD1_LDS_DIRECT_LITERAL_HI) - - -// VTX - -BC_FORMAT_BEGIN_HW(VTX_WORD0, R6R7EG) -BC_FIELD(VTX_WORD0, VC_INST, INST, 4, 0) -BC_FIELD(VTX_WORD0, FETCH_TYPE, FT, 6, 5) -BC_FIELD(VTX_WORD0, FETCH_WHOLE_QUAD, FWQ, 7, 7) -BC_FIELD(VTX_WORD0, BUFFER_ID, BUF_ID, 15, 8) -BC_FIELD(VTX_WORD0, SRC_GPR, S_GPR, 22, 16) -BC_FIELD(VTX_WORD0, SRC_REL, SR, 23, 23) -BC_FIELD(VTX_WORD0, SRC_SEL_X, SSX, 25, 24) -BC_FIELD(VTX_WORD0, MEGA_FETCH_COUNT, MFC, 31, 26) -BC_FORMAT_END(VTX_WORD0) - -BC_FORMAT_BEGIN_HW(VTX_WORD0, CM) -BC_FIELD(VTX_WORD0, VC_INST, INST, 4, 0) -BC_FIELD(VTX_WORD0, FETCH_TYPE, FT, 6, 5) -BC_FIELD(VTX_WORD0, FETCH_WHOLE_QUAD, FWQ, 7, 7) -BC_FIELD(VTX_WORD0, BUFFER_ID, BUF_ID, 15, 8) -BC_FIELD(VTX_WORD0, SRC_GPR, S_GPR, 22, 16) -BC_FIELD(VTX_WORD0, SRC_REL, SR, 23, 23) -BC_FIELD(VTX_WORD0, SRC_SEL_X, SSX, 25, 24) -BC_FIELD(VTX_WORD0, SRC_SEL_Y, SSY, 27, 26) -BC_FIELD(VTX_WORD0, STRUCTURED_READ, SR, 29, 28) -BC_FIELD(VTX_WORD0, LDS_REQ, LR, 30, 30) -BC_FIELD(VTX_WORD0, COALESCED_READ, CR, 31, 31) -BC_FORMAT_END(VTX_WORD0) - - -BC_FORMAT_BEGIN(VTX_WORD1_GPR) -BC_FIELD(VTX_WORD1_GPR, DST_GPR, D_GPR, 6, 0) -BC_FIELD(VTX_WORD1_GPR, DST_REL, DR, 7, 7) -BC_RSRVD(VTX_WORD1_GPR, 8, 8) -BC_FIELD(VTX_WORD1_GPR, DST_SEL_X, DSX, 11, 9) -BC_FIELD(VTX_WORD1_GPR, DST_SEL_Y, DSY, 14, 12) -BC_FIELD(VTX_WORD1_GPR, DST_SEL_Z, DSZ, 17, 15) -BC_FIELD(VTX_WORD1_GPR, DST_SEL_W, DSW, 20, 18) -BC_FIELD(VTX_WORD1_GPR, USE_CONST_FIELDS, UCF, 21, 21) -BC_FIELD(VTX_WORD1_GPR, DATA_FORMAT, DFMT, 27, 22) -BC_FIELD(VTX_WORD1_GPR, NUM_FORMAT_ALL, NFA, 29, 28) -BC_FIELD(VTX_WORD1_GPR, FORMAT_COMP_ALL, FCA, 30, 30) -BC_FIELD(VTX_WORD1_GPR, SRF_MODE_ALL, SMA, 31, 31) -BC_FORMAT_END(VTX_WORD1_GPR) - -BC_FORMAT_BEGIN(VTX_WORD1_SEM) -BC_FIELD(VTX_WORD1_SEM, SEMANTIC_ID, SID, 7, 0) -BC_RSRVD(VTX_WORD1_SEM, 8, 8) -BC_FIELD(VTX_WORD1_SEM, DST_SEL_X, DSX, 11, 9) -BC_FIELD(VTX_WORD1_SEM, DST_SEL_Y, DSY, 14, 12) -BC_FIELD(VTX_WORD1_SEM, DST_SEL_Z, DSZ, 17, 15) -BC_FIELD(VTX_WORD1_SEM, DST_SEL_W, DSW, 20, 18) -BC_FIELD(VTX_WORD1_SEM, USE_CONST_FIELDS, UCF, 21, 21) -BC_FIELD(VTX_WORD1_SEM, DATA_FORMAT, DFMT, 27, 22) -BC_FIELD(VTX_WORD1_SEM, NUM_FORMAT_ALL, NFA, 29, 28) -BC_FIELD(VTX_WORD1_SEM, FORMAT_COMP_ALL, FCA, 30, 30) -BC_FIELD(VTX_WORD1_SEM, SRF_MODE_ALL, SMA, 31, 31) -BC_FORMAT_END(VTX_WORD1_SEM) - - -BC_FORMAT_BEGIN_HW(VTX_WORD2, R6) -BC_FIELD(VTX_WORD2, OFFSET, OFS, 15, 0) -BC_FIELD(VTX_WORD2, ENDIAN_SWAP, ES, 17, 16) -BC_FIELD(VTX_WORD2, CONST_BUF_NO_STRIDE, CBNS, 18, 18) -BC_FIELD(VTX_WORD2, MEGA_FETCH, MF, 19, 19) -BC_RSRVD(VTX_WORD2, 31, 20) -BC_FORMAT_END(VTX_WORD2) - -BC_FORMAT_BEGIN_HW(VTX_WORD2, R7) -BC_FIELD(VTX_WORD2, OFFSET, OFS, 15, 0) -BC_FIELD(VTX_WORD2, ENDIAN_SWAP, ES, 17, 16) -BC_FIELD(VTX_WORD2, CONST_BUF_NO_STRIDE, CBNS, 18, 18) -BC_FIELD(VTX_WORD2, MEGA_FETCH, MF, 19, 19) -BC_FIELD(VTX_WORD2, ALT_CONST, ALT_C, 20, 20) -BC_RSRVD(VTX_WORD2, 31, 21) -BC_FORMAT_END(VTX_WORD2) - -BC_FORMAT_BEGIN_HW(VTX_WORD2, EG) -BC_FIELD(VTX_WORD2, OFFSET, OFS, 15, 0) -BC_FIELD(VTX_WORD2, ENDIAN_SWAP, ES, 17, 16) -BC_FIELD(VTX_WORD2, CONST_BUF_NO_STRIDE, CBNS, 18, 18) -BC_FIELD(VTX_WORD2, MEGA_FETCH, MF, 19, 19) -BC_FIELD(VTX_WORD2, ALT_CONST, ALT_C, 20, 20) -BC_FIELD(VTX_WORD2, BUFFER_INDEX_MODE, BIM, 22, 21) -BC_RSRVD(VTX_WORD2, 31, 23) -BC_FORMAT_END(VTX_WORD2) - -BC_FORMAT_BEGIN_HW(VTX_WORD2, CM) -BC_FIELD(VTX_WORD2, OFFSET, OFS, 15, 0) -BC_FIELD(VTX_WORD2, ENDIAN_SWAP, ES, 17, 16) -BC_FIELD(VTX_WORD2, CONST_BUF_NO_STRIDE, CBNS, 18, 18) -BC_RSRVD(VTX_WORD2, 19, 19) -BC_FIELD(VTX_WORD2, ALT_CONST, ALT_C, 20, 20) -BC_FIELD(VTX_WORD2, BUFFER_INDEX_MODE, BIM, 22, 21) -BC_RSRVD(VTX_WORD2, 31, 23) -BC_FORMAT_END(VTX_WORD2) - -// TEX - -BC_FORMAT_BEGIN_HW(TEX_WORD0, R6) -BC_FIELD(TEX_WORD0, TEX_INST, T_INST, 4, 0) -BC_FIELD(TEX_WORD0, BC_FRAC_MODE, BFM, 5, 5) -BC_RSRVD(TEX_WORD0, 6, 6) -BC_FIELD(TEX_WORD0, FETCH_WHOLE_QUAD, FWQ, 7, 7) -BC_FIELD(TEX_WORD0, RESOURCE_ID, RSRC_ID, 15, 8) -BC_FIELD(TEX_WORD0, SRC_GPR, S_GPR, 22, 16) -BC_FIELD(TEX_WORD0, SRC_REL, SR, 23, 23) -BC_RSRVD(TEX_WORD0, 31, 24) -BC_FORMAT_END(TEX_WORD0) - -BC_FORMAT_BEGIN_HW(TEX_WORD0, R7) -BC_FIELD(TEX_WORD0, TEX_INST, T_INST, 4, 0) -BC_FIELD(TEX_WORD0, BC_FRAC_MODE, BFM, 5, 5) -BC_RSRVD(TEX_WORD0, 6, 6) -BC_FIELD(TEX_WORD0, FETCH_WHOLE_QUAD, FWQ, 7, 7) -BC_FIELD(TEX_WORD0, RESOURCE_ID, RSRC_ID, 15, 8) -BC_FIELD(TEX_WORD0, SRC_GPR, S_GPR, 22, 16) -BC_FIELD(TEX_WORD0, SRC_REL, SR, 23, 23) -BC_FIELD(TEX_WORD0, ALT_CONST, ALT_C, 24, 24) -BC_RSRVD(TEX_WORD0, 31, 25) -BC_FORMAT_END(TEX_WORD0) - -BC_FORMAT_BEGIN_HW(TEX_WORD0, EGCM) -BC_FIELD(TEX_WORD0, TEX_INST, T_INST, 4, 0) -BC_FIELD(TEX_WORD0, INST_MOD, IMOD, 6, 5) -BC_FIELD(TEX_WORD0, FETCH_WHOLE_QUAD, FWQ, 7, 7) -BC_FIELD(TEX_WORD0, RESOURCE_ID, RSRC_ID, 15, 8) -BC_FIELD(TEX_WORD0, SRC_GPR, S_GPR, 22, 16) -BC_FIELD(TEX_WORD0, SRC_REL, SR, 23, 23) -BC_FIELD(TEX_WORD0, ALT_CONST, ALT_C, 24, 24) -BC_FIELD(TEX_WORD0, RESOURCE_INDEX_MODE, RIM, 26, 25) -BC_FIELD(TEX_WORD0, SAMPLER_INDEX_MODE, SIM, 28, 27) -BC_RSRVD(TEX_WORD0, 31, 29) -BC_FORMAT_END(TEX_WORD0) - - -BC_FORMAT_BEGIN(TEX_WORD1) -BC_FIELD(TEX_WORD1, DST_GPR, D_GPR, 6, 0) -BC_FIELD(TEX_WORD1, DST_REL, DR, 7, 7) -BC_RSRVD(TEX_WORD1, 8, 8) -BC_FIELD(TEX_WORD1, DST_SEL_X, DSX, 11, 9) -BC_FIELD(TEX_WORD1, DST_SEL_Y, DSY, 14, 12) -BC_FIELD(TEX_WORD1, DST_SEL_Z, DSZ, 17, 15) -BC_FIELD(TEX_WORD1, DST_SEL_W, DSW, 20, 18) -BC_FIELD(TEX_WORD1, LOD_BIAS, LBIAS, 27, 21) -BC_FIELD(TEX_WORD1, COORD_TYPE_X, CTX, 28, 28) -BC_FIELD(TEX_WORD1, COORD_TYPE_Y, CTY, 29, 29) -BC_FIELD(TEX_WORD1, COORD_TYPE_Z, CTZ, 30, 30) -BC_FIELD(TEX_WORD1, COORD_TYPE_W, CTW, 31, 31) -BC_FORMAT_END(TEX_WORD1) - - -BC_FORMAT_BEGIN(TEX_WORD2) -BC_FIELD(TEX_WORD2, OFFSET_X, OFS_X, 4, 0) -BC_FIELD(TEX_WORD2, OFFSET_Y, OFS_Y, 9, 5) -BC_FIELD(TEX_WORD2, OFFSET_Z, OFS_Z, 14, 10) -BC_FIELD(TEX_WORD2, SAMPLER_ID, SAMP_ID, 19, 15) -BC_FIELD(TEX_WORD2, SRC_SEL_X, SSX, 22, 20) -BC_FIELD(TEX_WORD2, SRC_SEL_Y, SSY, 25, 23) -BC_FIELD(TEX_WORD2, SRC_SEL_Z, SSZ, 28, 26) -BC_FIELD(TEX_WORD2, SRC_SEL_W, SSW, 31, 29) -BC_FORMAT_END(TEX_WORD2) - -BC_FORMAT_BEGIN_HW(MEM_GDS_WORD0, EGCM) -BC_FIELD(MEM_GDS_WORD0, MEM_INST, M_INST, 4, 0) -BC_FIELD(MEM_GDS_WORD0, MEM_OP, M_OP, 10, 8) -BC_FIELD(MEM_GDS_WORD0, SRC_GPR, S_GPR, 17, 11) -BC_FIELD(MEM_GDS_WORD0, SRC_REL_MODE, SRM, 19, 18) -BC_FIELD(MEM_GDS_WORD0, SRC_SEL_X, SSX, 22, 20) -BC_FIELD(MEM_GDS_WORD0, SRC_SEL_Y, SSY, 25, 23) -BC_FIELD(MEM_GDS_WORD0, SRC_SEL_Z, SSZ, 28, 26) -BC_FORMAT_END(MEM_GDS_WORD0) - -BC_FORMAT_BEGIN_HW(MEM_GDS_WORD1, EGCM) -BC_FIELD(MEM_GDS_WORD1, DST_GPR, D_GPR, 6, 0) -BC_FIELD(MEM_GDS_WORD1, DST_REL_MODE, DRM, 8, 7) -BC_FIELD(MEM_GDS_WORD1, GDS_OP, G_OP, 14, 9) -BC_FIELD(MEM_GDS_WORD1, SRC_GPR, S_GPR, 22, 16) -BC_FIELD(MEM_GDS_WORD1, UAV_INDEX_MODE, U_IM, 25, 24) -BC_FIELD(MEM_GDS_WORD1, UAV_ID, U_ID, 29, 26) -BC_FIELD(MEM_GDS_WORD1, ALLOC_CONSUME, AC, 30, 30) -BC_FIELD(MEM_GDS_WORD1, BCAST_FIRST_REQ, BFR, 31, 31) -BC_FORMAT_END(MEM_GDS_WORD1) - -BC_FORMAT_BEGIN_HW(MEM_GDS_WORD2, EGCM) -BC_FIELD(MEM_GDS_WORD2, DST_SEL_X, DSX, 2, 0) -BC_FIELD(MEM_GDS_WORD2, DST_SEL_Y, DSY, 5, 3) -BC_FIELD(MEM_GDS_WORD2, DST_SEL_Z, DSZ, 8, 6) -BC_FIELD(MEM_GDS_WORD2, DST_SEL_W, DSW, 11, 9) -BC_FORMAT_END(MEM_GDS_WORD2) - -// MEM_RD_ -BC_FORMAT_BEGIN_HW(MEM_RD_WORD0, R7EGCM) -BC_FIELD(MEM_RD_WORD0, MEM_INST, M_INST, 4, 0) -BC_FIELD(MEM_RD_WORD0, ELEM_SIZE, ES, 6, 5) -BC_FIELD(MEM_RD_WORD0, FETCH_WHOLE_QUAD, FWQ, 7, 7) -BC_FIELD(MEM_RD_WORD0, MEM_OP, M_OP, 10, 8) -BC_FIELD(MEM_RD_WORD0, UNCACHED, UC, 11, 11) -BC_FIELD(MEM_RD_WORD0, INDEXED, INDEXED, 12, 12) -BC_FIELD(MEM_RD_WORD0, SRC_SEL_Y, SSY, 14, 13) -BC_FIELD(MEM_RD_WORD0, SRC_GPR, S_GPR, 22, 16) -BC_FIELD(MEM_RD_WORD0, SRC_REL, SR, 23, 23) -BC_FIELD(MEM_RD_WORD0, SRC_SEL_X, SSX, 25, 24) -BC_FIELD(MEM_RD_WORD0, BURST_COUNT, BURST, 29, 26) -BC_FIELD(MEM_RD_WORD0, LDS_REQ, LDS_REQ, 30, 30) -BC_FIELD(MEM_RD_WORD0, COALESCED_READ, COALESCED, 31, 31) -BC_FORMAT_END(MEM_RD_WORD0) - -BC_FORMAT_BEGIN_HW(MEM_RD_WORD1, R7EGCM) -BC_FIELD(MEM_RD_WORD1, DST_GPR, D_GPR, 6, 0) -BC_FIELD(MEM_RD_WORD1, DST_REL, DR, 7, 7) -BC_FIELD(MEM_RD_WORD1, DST_SEL_X, DSX, 11, 9) -BC_FIELD(MEM_RD_WORD1, DST_SEL_Y, DSY, 14, 12) -BC_FIELD(MEM_RD_WORD1, DST_SEL_Z, DSZ, 17, 15) -BC_FIELD(MEM_RD_WORD1, DST_SEL_W, DSW, 20, 18) -BC_FIELD(MEM_RD_WORD1, DATA_FORMAT, DFMT, 27, 22) -BC_FIELD(MEM_RD_WORD1, NUM_FORMAT_ALL, NFA, 29, 28) -BC_FIELD(MEM_RD_WORD1, FORMAT_COMP_ALL, FCA, 30, 30) -BC_FIELD(MEM_RD_WORD1, SRF_MODE_ALL, SMA, 31, 31) -BC_FORMAT_END(MEM_RD_WORD1) - -BC_FORMAT_BEGIN_HW(MEM_RD_WORD2, R7EGCM) -BC_FIELD(MEM_RD_WORD1, ARRAY_BASE, ARR_BS, 12, 0) -BC_FIELD(MEM_RD_WORD1, ENDIAN_SWAP, ES, 17, 16) -BC_FIELD(MEM_RD_WORD1, ARR_SIZE, ARR_SZ, 31, 20) -BC_FORMAT_END(MEM_RD_WORD2) diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp deleted file mode 100644 index da27c20..0000000 --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ /dev/null @@ -1,1011 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#define BCP_DEBUG 0 - -#if BCP_DEBUG -#define BCP_DUMP(q) do { q } while (0) -#else -#define BCP_DUMP(q) -#endif - -#include "r600_pipe.h" -#include "r600_shader.h" -#include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1 - -#include - -#include "sb_bc.h" -#include "sb_shader.h" -#include "sb_pass.h" -#include "util/macros.h" - -namespace r600_sb { - -int bc_parser::decode() { - - dw = bc->bytecode; - bc_ndw = bc->ndw; - max_cf = 0; - - dec = new bc_decoder(ctx, dw, bc_ndw); - - shader_target t = TARGET_UNKNOWN; - - if (pshader) { - switch (bc->type) { - case PIPE_SHADER_FRAGMENT: t = TARGET_PS; break; - case PIPE_SHADER_VERTEX: - t = pshader->vs_as_ls ? TARGET_LS : (pshader->vs_as_es ? TARGET_ES : TARGET_VS); - break; - case PIPE_SHADER_GEOMETRY: t = TARGET_GS; break; - case PIPE_SHADER_COMPUTE: t = TARGET_COMPUTE; break; - case PIPE_SHADER_TESS_CTRL: t = TARGET_HS; break; - case PIPE_SHADER_TESS_EVAL: t = pshader->tes_as_es ? TARGET_ES : TARGET_VS; break; - default: assert(!"unknown shader target"); return -1; break; - } - } else { - if (bc->type == PIPE_SHADER_COMPUTE) - t = TARGET_COMPUTE; - else - t = TARGET_FETCH; - } - - sh = new shader(ctx, t, bc->debug_id); - sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE || bc->precise); - - int r = decode_shader(); - - delete dec; - - sh->ngpr = bc->ngpr; - sh->nstack = bc->nstack; - - return r; -} - -int bc_parser::decode_shader() { - int r = 0; - unsigned i = 0; - bool eop = false; - - sh->init(); - - do { - eop = false; - if ((r = decode_cf(i, eop))) - return r; - - } while (!eop || (i >> 1) < max_cf); - - return 0; -} - -int bc_parser::prepare() { - int r = 0; - if ((r = parse_decls())) - return r; - if ((r = prepare_ir())) - return r; - return 0; -} - -int bc_parser::parse_decls() { - - if (!pshader) { - if (gpr_reladdr) - sh->add_gpr_array(0, bc->ngpr, 0x0F); - - // compute shaders have some values preloaded in R0, R1 - sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */); - sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */); - return 0; - } - - if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER))) { - - assert(pshader->num_arrays); - - if (pshader->num_arrays) { - for (unsigned i = 0; i < pshader->num_arrays; ++i) { - r600_shader_array &a = pshader->arrays[i]; - sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); - } - } else { - sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F); - } - } - - // GS inputs can add indirect addressing - if (sh->target == TARGET_GS) { - if (pshader->num_arrays) { - for (unsigned i = 0; i < pshader->num_arrays; ++i) { - r600_shader_array &a = pshader->arrays[i]; - sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); - } - } - } - - if (sh->target == TARGET_VS || sh->target == TARGET_ES || sh->target == TARGET_HS || sh->target == TARGET_LS) - sh->add_input(0, 1, 0x0F); - else if (sh->target == TARGET_GS) { - sh->add_input(0, 1, 0x0F); - sh->add_input(1, 1, 0x0F); - } else if (sh->target == TARGET_COMPUTE) { - sh->add_input(0, 1, 0x0F); - sh->add_input(1, 1, 0x0F); - } - - bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN - && sh->target == TARGET_PS; - - bool ij_interpolators[6]; - memset(ij_interpolators, 0, sizeof(ij_interpolators)); - - for (unsigned i = 0; i < pshader->ninput; ++i) { - r600_shader_io & in = pshader->input[i]; - bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid); - sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F); - if (ps_interp && in.spi_sid) { - int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location); - if (k >= 0) { - ij_interpolators[k] |= true; - if (in.uses_interpolate_at_centroid) { - k = eg_get_interpolator_index(in.interpolate, TGSI_INTERPOLATE_LOC_CENTROID); - ij_interpolators[k] |= true; - } - } - } - } - - if (ps_interp) { - /* add the egcm ij interpolators to live inputs */ - unsigned num_ij = 0; - for (unsigned i = 0; i < ARRAY_SIZE(ij_interpolators); i++) { - num_ij += ij_interpolators[i]; - } - - unsigned mask = (1 << (2 * num_ij)) - 1; - unsigned gpr = 0; - - while (mask) { - sh->add_input(gpr, true, mask & 0x0F); - ++gpr; - mask >>= 4; - } - } - - return 0; -} - -int bc_parser::decode_cf(unsigned &i, bool &eop) { - - int r; - - cf_node *cf = sh->create_cf(); - sh->root->push_back(cf); - - unsigned id = i >> 1; - - cf->bc.id = id; - - if (cf_map.size() < id + 1) - cf_map.resize(id + 1); - - cf_map[id] = cf; - - if ((r = dec->decode_cf(i, cf->bc))) - return r; - - cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags; - - if (flags & CF_ALU) { - if ((r = decode_alu_clause(cf))) - return r; - } else if (flags & CF_FETCH) { - if ((r = decode_fetch_clause(cf))) - return r; - } else if (flags & CF_EXP) { - if (cf->bc.rw_rel) - gpr_reladdr = true; - assert(!cf->bc.rw_rel); - } else if (flags & CF_MEM) { - if (cf->bc.rw_rel) - gpr_reladdr = true; - assert(!cf->bc.rw_rel); - } else if (flags & CF_BRANCH) { - if (cf->bc.addr > max_cf) - max_cf = cf->bc.addr; - } - - eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END || - cf->bc.op == CF_OP_RET; - return 0; -} - -int bc_parser::decode_alu_clause(cf_node* cf) { - unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt; - - cf->subtype = NST_ALU_CLAUSE; - - cgroup = 0; - memset(slots[0], 0, 5*sizeof(slots[0][0])); - - UNUSED unsigned ng = 0; - - do { - decode_alu_group(cf, i, gcnt); - assert(gcnt <= cnt); - cnt -= gcnt; - ng++; - } while (cnt); - - return 0; -} - -int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) { - int r; - alu_node *n; - alu_group_node *g = sh->create_alu_group(); - - cgroup = !cgroup; - memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); - gcnt = 0; - - unsigned literal_mask = 0; - - do { - n = sh->create_alu(); - g->push_back(n); - - if ((r = dec->decode_alu(i, n->bc))) - return r; - - if (!sh->assign_slot(n, slots[cgroup])) { - assert(!"alu slot assignment failed"); - return -1; - } - - gcnt++; - - } while (gcnt <= 5 && !n->bc.last); - - assert(n->bc.last); - - for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { - n = static_cast(*I); - - if (n->bc.dst_rel) - gpr_reladdr = true; - - for (int k = 0; k < n->bc.op_ptr->src_count; ++k) { - bc_alu_src &src = n->bc.src[k]; - if (src.rel) - gpr_reladdr = true; - if (src.sel == ALU_SRC_LITERAL) { - literal_mask |= (1 << src.chan); - src.value.u = dw[i + src.chan]; - } - } - } - - unsigned literal_ndw = 0; - while (literal_mask) { - g->literals.push_back(dw[i + literal_ndw]); - literal_ndw += 1; - literal_mask >>= 1; - } - - literal_ndw = (literal_ndw + 1) & ~1u; - - i += literal_ndw; - gcnt += literal_ndw >> 1; - - cf->push_back(g); - return 0; -} - -int bc_parser::prepare_alu_clause(cf_node* cf) { - - // loop over alu groups - for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { - assert(I->subtype == NST_ALU_GROUP); - alu_group_node *g = static_cast(*I); - prepare_alu_group(cf, g); - } - - return 0; -} - -void bc_parser::save_set_cf_index(value *val, unsigned idx) -{ - assert(idx <= 1); - assert(val); - cf_index_value[idx] = val; -} -value *bc_parser::get_cf_index_value(unsigned idx) -{ - assert(idx <= 1); - assert(cf_index_value[idx]); - return cf_index_value[idx]; -} -void bc_parser::save_mova(alu_node *mova) -{ - assert(mova); - this->mova = mova; -} -alu_node *bc_parser::get_mova() -{ - assert(mova); - return mova; -} - -int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { - - alu_node *n; - - cgroup = !cgroup; - memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); - - for (node_iterator I = g->begin(), E = g->end(); - I != E; ++I) { - n = static_cast(*I); - bool ubo_indexing[2] = {}; - - if (!sh->assign_slot(n, slots[cgroup])) { - assert(!"alu slot assignment failed"); - return -1; - } - - unsigned src_count = n->bc.op_ptr->src_count; - - if (ctx.alu_slots(n->bc.op) & AF_4SLOT) - n->flags |= NF_ALU_4SLOT; - - if (ctx.alu_slots(n->bc.op) & AF_2SLOT) - n->flags |= NF_ALU_2SLOT; - - n->src.resize(src_count); - - unsigned flags = n->bc.op_ptr->flags; - - if (flags & AF_LDS) { - bool need_rw = false, need_oqa = false, need_oqb = false; - int ndst = 0, ncount = 0; - - /* all non-read operations have side effects */ - if (n->bc.op != LDS_OP2_LDS_READ2_RET && - n->bc.op != LDS_OP1_LDS_READ_REL_RET && - n->bc.op != LDS_OP1_LDS_READ_RET) { - n->flags |= NF_DONT_KILL; - ndst++; - need_rw = true; - } - - if (n->bc.op >= LDS_OP2_LDS_ADD_RET && n->bc.op <= LDS_OP1_LDS_USHORT_READ_RET) { - need_oqa = true; - ndst++; - } - - if (n->bc.op == LDS_OP2_LDS_READ2_RET || n->bc.op == LDS_OP1_LDS_READ_REL_RET) { - need_oqb = true; - ndst++; - } - - n->dst.resize(ndst); - if (need_oqa) - n->dst[ncount++] = sh->get_special_value(SV_LDS_OQA); - if (need_oqb) - n->dst[ncount++] = sh->get_special_value(SV_LDS_OQB); - if (need_rw) - n->dst[ncount++] = sh->get_special_value(SV_LDS_RW); - - n->flags |= NF_DONT_MOVE | NF_DONT_HOIST; - - } else if (flags & AF_PRED) { - n->dst.resize(3); - if (n->bc.update_pred) - n->dst[1] = sh->get_special_value(SV_ALU_PRED); - if (n->bc.update_exec_mask) - n->dst[2] = sh->get_special_value(SV_EXEC_MASK); - - n->flags |= NF_DONT_HOIST; - - } else if (flags & AF_KILL) { - - n->dst.resize(2); - n->dst[1] = sh->get_special_value(SV_VALID_MASK); - sh->set_uses_kill(); - - n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | - NF_DONT_KILL | NF_SCHEDULE_EARLY; - - } else { - n->dst.resize(1); - } - - if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op == ALU_OP0_SET_CF_IDX1) { - // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX - // DCE will kill this op - save_set_cf_index(get_mova()->src[0], n->bc.op == ALU_OP0_SET_CF_IDX1); - } else if (flags & AF_MOVA) { - - n->dst[0] = sh->get_special_value(SV_AR_INDEX); - save_mova(n); - - n->flags |= NF_DONT_HOIST; - - } else if ((n->bc.op_ptr->src_count == 3 || n->bc.write_mask) && !(flags & AF_LDS)) { - assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X); - - value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan, - n->bc.dst_rel); - - n->dst[0] = v; - } - - if (n->bc.pred_sel) { - sh->has_alu_predication = true; - n->pred = sh->get_special_value(SV_ALU_PRED); - } - - for (unsigned s = 0; s < src_count; ++s) { - bc_alu_src &src = n->bc.src[s]; - - if (src.sel == ALU_SRC_LITERAL) { - n->src[s] = sh->get_const_value(src.value); - } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) { - unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ? - ((unsigned)SLOT_TRANS) : src.chan; - - // XXX shouldn't happen but llvm backend uses PS on cayman - if (prev_slot == SLOT_TRANS && ctx.is_cayman()) - prev_slot = SLOT_X; - - alu_node *prev_alu = slots[pgroup][prev_slot]; - - assert(prev_alu); - - if (!prev_alu->dst[0]) { - value * t = sh->create_temp_value(); - prev_alu->dst[0] = t; - } - - value *d = prev_alu->dst[0]; - - if (d->is_rel()) { - d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr, - prev_alu->bc.dst_chan, - prev_alu->bc.dst_rel); - } - - n->src[s] = d; - } else if (ctx.is_kcache_sel(src.sel)) { - unsigned sel = src.sel, kc_addr; - unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1); - - bc_kcache &kc = cf->bc.kc[kc_set]; - kc_addr = (kc.addr << 4) + (sel & 0x1F); - n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan, (alu_kcache_index_mode)kc.index_mode); - - if (kc.index_mode != KC_INDEX_NONE) { - assert(kc.index_mode != KC_LOCK_LOOP); - ubo_indexing[kc.index_mode - KC_INDEX_0] = true; - } - } else if (src.sel < MAX_GPR) { - value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel); - - n->src[s] = v; - - } else if (src.sel >= ALU_SRC_PARAM_OFFSET) { - // using slot for value channel because in fact the slot - // determines the channel that is loaded by INTERP_LOAD_P0 - // (and maybe some others). - // otherwise GVN will consider INTERP_LOAD_P0s with the same - // param index as equal instructions and leave only one of them - n->src[s] = sh->get_special_ro_value(sel_chan(src.sel, - n->bc.slot)); - } else if (ctx.is_lds_oq(src.sel)) { - switch (src.sel) { - case ALU_SRC_LDS_OQ_A: - case ALU_SRC_LDS_OQ_B: - assert(!"Unsupported LDS queue access in SB"); - break; - case ALU_SRC_LDS_OQ_A_POP: - n->src[s] = sh->get_special_value(SV_LDS_OQA); - break; - case ALU_SRC_LDS_OQ_B_POP: - n->src[s] = sh->get_special_value(SV_LDS_OQB); - break; - } - n->flags |= NF_DONT_HOIST | NF_DONT_MOVE; - - } else { - switch (src.sel) { - case ALU_SRC_0: - n->src[s] = sh->get_const_value(0); - break; - case ALU_SRC_0_5: - n->src[s] = sh->get_const_value(0.5f); - break; - case ALU_SRC_1: - n->src[s] = sh->get_const_value(1.0f); - break; - case ALU_SRC_1_INT: - n->src[s] = sh->get_const_value(1); - break; - case ALU_SRC_M_1_INT: - n->src[s] = sh->get_const_value(-1); - break; - default: - n->src[s] = sh->get_special_ro_value(src.sel); - break; - } - } - } - - // add UBO index values if any as dependencies - if (ubo_indexing[0]) { - n->src.push_back(get_cf_index_value(0)); - } - if (ubo_indexing[1]) { - n->src.push_back(get_cf_index_value(1)); - } - - if ((flags & AF_MOVA) && (n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) && - ctx.is_cayman()) - // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX - save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1); - } - - // pack multislot instructions into alu_packed_node - - alu_packed_node *p = NULL; - for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) { - N = I + 1; - alu_node *a = static_cast(*I); - unsigned sflags = a->bc.slot_flags; - - if (sflags == AF_4V || sflags == AF_2V || (ctx.is_cayman() && sflags == AF_S)) { - if (!p) - p = sh->create_alu_packed(); - - a->remove(); - p->push_back(a); - if (sflags == AF_2V && p->count() == 2) { - g->push_front(p); - p = NULL; - } - } - } - - if (p) { - g->push_front(p); - - if (p->count() == 3 && ctx.is_cayman()) { - // cayman's scalar instruction that can use 3 or 4 slots - - // FIXME for simplicity we'll always add 4th slot, - // but probably we might want to always remove 4th slot and make - // sure that regalloc won't choose 'w' component for dst - - alu_node *f = static_cast(p->first); - alu_node *a = sh->create_alu(); - a->src = f->src; - a->dst.resize(f->dst.size()); - a->bc = f->bc; - a->bc.slot = SLOT_W; - p->push_back(a); - } - } - - return 0; -} - -int bc_parser::decode_fetch_clause(cf_node* cf) { - int r; - unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1; - - if (cf->bc.op_ptr->flags & FF_GDS) - cf->subtype = NST_GDS_CLAUSE; - else - cf->subtype = NST_TEX_CLAUSE; - - while (cnt--) { - fetch_node *n = sh->create_fetch(); - cf->push_back(n); - if ((r = dec->decode_fetch(i, n->bc))) - return r; - if (n->bc.src_rel || n->bc.dst_rel) - gpr_reladdr = true; - - } - return 0; -} - -int bc_parser::prepare_fetch_clause(cf_node *cf) { - - vvec grad_v, grad_h, texture_offsets; - - for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { - - fetch_node *n = static_cast(*I); - assert(n->is_valid()); - - unsigned flags = n->bc.op_ptr->flags; - - unsigned vtx = flags & FF_VTX; - unsigned gds = flags & FF_GDS; - unsigned num_src = gds ? 2 : vtx ? ctx.vtx_src_num : 4; - - n->dst.resize(4); - - if (gds) { - n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | NF_DONT_KILL; - } - if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) { - sh->uses_gradients = true; - } - - if (flags & (FF_SETGRAD | FF_SET_TEXTURE_OFFSETS)) { - - vvec *grad = NULL; - - switch (n->bc.op) { - case FETCH_OP_SET_GRADIENTS_V: - grad = &grad_v; - break; - case FETCH_OP_SET_GRADIENTS_H: - grad = &grad_h; - break; - case FETCH_OP_SET_TEXTURE_OFFSETS: - grad = &texture_offsets; - break; - default: - assert(!"unexpected SET_GRAD instruction"); - return -1; - } - - if (grad->empty()) - grad->resize(4); - - for(unsigned s = 0; s < 4; ++s) { - unsigned sw = n->bc.src_sel[s]; - if (sw <= SEL_W) - (*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr, - sw, false); - else if (sw == SEL_0) - (*grad)[s] = sh->get_const_value(0.0f); - else if (sw == SEL_1) - (*grad)[s] = sh->get_const_value(1.0f); - } - } else { - // Fold source values for instructions with hidden target values in to the instructions - // using them. The set instructions are later re-emitted by bc_finalizer - if (flags & FF_USEGRAD) { - n->src.resize(12); - std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4); - std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8); - } else if (flags & FF_USE_TEXTURE_OFFSETS) { - n->src.resize(8); - std::copy(texture_offsets.begin(), texture_offsets.end(), n->src.begin() + 4); - } else { - n->src.resize(4); - } - - for(int s = 0; s < 4; ++s) { - if (n->bc.dst_sel[s] != SEL_MASK) - n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false); - // NOTE: it doesn't matter here which components of the result we - // are using, but original n->bc.dst_sel should be taken into - // account when building the bytecode - } - for(unsigned s = 0; s < num_src; ++s) { - if (n->bc.src_sel[s] <= SEL_W) - n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr, - n->bc.src_sel[s], false); - } - - // Scheduler will emit the appropriate instructions to set CF_IDX0/1 - if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) { - n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == V_SQ_CF_INDEX_1)); - } - if (n->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) { - n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == V_SQ_CF_INDEX_1)); - } - } - - if (n->bc.op == FETCH_OP_READ_SCRATCH) { - n->src.push_back(sh->get_special_value(SV_SCRATCH)); - n->dst.push_back(sh->get_special_value(SV_SCRATCH)); - } - } - - return 0; -} - -int bc_parser::prepare_ir() { - - for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) { - cf_node *c = *I; - - if (!c) - continue; - - unsigned flags = c->bc.op_ptr->flags; - - if (flags & CF_ALU) { - prepare_alu_clause(c); - } else if (flags & CF_FETCH) { - prepare_fetch_clause(c); - } else if (c->bc.op == CF_OP_CALL_FS) { - sh->init_call_fs(c); - c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE; - } else if (flags & CF_LOOP_START) { - prepare_loop(c); - } else if (c->bc.op == CF_OP_JUMP) { - prepare_if(c); - } else if (c->bc.op == CF_OP_LOOP_END) { - loop_stack.pop(); - } else if (c->bc.op == CF_OP_LOOP_CONTINUE) { - assert(!loop_stack.empty()); - repeat_node *rep = sh->create_repeat(loop_stack.top()); - if (c->parent->first != c) - rep->move(c->parent->first, c); - c->replace_with(rep); - sh->simplify_dep_rep(rep); - } else if (c->bc.op == CF_OP_LOOP_BREAK) { - assert(!loop_stack.empty()); - depart_node *dep = sh->create_depart(loop_stack.top()); - if (c->parent->first != c) - dep->move(c->parent->first, c); - c->replace_with(dep); - sh->simplify_dep_rep(dep); - } else if (flags & CF_EXP) { - - // unroll burst exports - - assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE); - - c->bc.set_op(CF_OP_EXPORT); - - unsigned burst_count = c->bc.burst_count; - unsigned eop = c->bc.end_of_program; - - c->bc.end_of_program = 0; - c->bc.burst_count = 0; - - do { - c->src.resize(4); - - for(int s = 0; s < 4; ++s) { - switch (c->bc.sel[s]) { - case SEL_0: - c->src[s] = sh->get_const_value(0.0f); - break; - case SEL_1: - c->src[s] = sh->get_const_value(1.0f); - break; - case SEL_MASK: - break; - default: - if (c->bc.sel[s] <= SEL_W) - c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr, - c->bc.sel[s], false); - else - assert(!"invalid src_sel for export"); - } - } - - if (!burst_count--) - break; - - cf_node *cf_next = sh->create_cf(); - cf_next->bc = c->bc; - ++cf_next->bc.rw_gpr; - ++cf_next->bc.array_base; - - c->insert_after(cf_next); - c = cf_next; - - } while (1); - - c->bc.end_of_program = eop; - } else if (flags & CF_MEM) { - - unsigned burst_count = c->bc.burst_count; - unsigned eop = c->bc.end_of_program; - - c->bc.end_of_program = 0; - c->bc.burst_count = 0; - - do { - - if (ctx.hw_class == HW_CLASS_R600 && c->bc.op == CF_OP_MEM_SCRATCH && - (c->bc.type == 2 || c->bc.type == 3)) { - c->dst.resize(4); - for(int s = 0; s < 4; ++s) { - if (c->bc.comp_mask & (1 << s)) - c->dst[s] = - sh->get_gpr_value(true, c->bc.rw_gpr, s, false); - } - } else { - c->src.resize(4); - - - for(int s = 0; s < 4; ++s) { - if (c->bc.comp_mask & (1 << s)) - c->src[s] = - sh->get_gpr_value(true, c->bc.rw_gpr, s, false); - } - } - - if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write - c->src.resize(8); - for(int s = 0; s < 3; ++s) { - c->src[4 + s] = - sh->get_gpr_value(true, c->bc.index_gpr, s, false); - } - - // FIXME probably we can relax it a bit - c->flags |= NF_DONT_HOIST | NF_DONT_MOVE; - } - - if (flags & CF_EMIT) { - // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX - c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); - c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); - if (sh->target == TARGET_ES) { - // For ES shaders this is an export - c->flags |= NF_DONT_KILL; - } - } - else if (c->bc.op == CF_OP_MEM_SCRATCH) { - c->src.push_back(sh->get_special_value(SV_SCRATCH)); - c->dst.push_back(sh->get_special_value(SV_SCRATCH)); - } - - if (!burst_count--) - break; - - cf_node *cf_next = sh->create_cf(); - cf_next->bc = c->bc; - ++cf_next->bc.rw_gpr; - - // FIXME is it correct? - cf_next->bc.array_base += cf_next->bc.elem_size + 1; - - c->insert_after(cf_next); - c = cf_next; - } while (1); - - c->bc.end_of_program = eop; - - } else if (flags & CF_EMIT) { - /* quick peephole */ - cf_node *prev = static_cast(c->prev); - if (c->bc.op == CF_OP_CUT_VERTEX && - prev && prev->is_valid() && - prev->bc.op == CF_OP_EMIT_VERTEX && - c->bc.count == prev->bc.count) { - prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX); - prev->bc.end_of_program = c->bc.end_of_program; - c->remove(); - } - else { - c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE; - - c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); - c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); - } - } else if (c->bc.op == CF_OP_WAIT_ACK) { - c->src.push_back(sh->get_special_value(SV_SCRATCH)); - c->dst.push_back(sh->get_special_value(SV_SCRATCH)); - } - } - - assert(loop_stack.empty()); - return 0; -} - -int bc_parser::prepare_loop(cf_node* c) { - assert(c->bc.addr-1 < cf_map.size()); - - cf_node *end = cf_map[c->bc.addr - 1]; - assert(end->bc.op == CF_OP_LOOP_END); - assert(c->parent == end->parent); - - region_node *reg = sh->create_region(); - repeat_node *rep = sh->create_repeat(reg); - - reg->push_back(rep); - c->insert_before(reg); - rep->move(c, end->next); - - reg->src_loop = true; - - loop_stack.push(reg); - return 0; -} - -int bc_parser::prepare_if(cf_node* c) { - assert(c->bc.addr-1 < cf_map.size()); - cf_node *c_else = NULL, *end = cf_map[c->bc.addr]; - - if (!end) - return 0; // not quite sure how this happens, malformed input? - - BCP_DUMP( - sblog << "parsing JUMP @" << c->bc.id; - sblog << "\n"; - ); - - if (end->bc.op == CF_OP_ELSE) { - BCP_DUMP( - sblog << " found ELSE : "; - dump::dump_op(end); - sblog << "\n"; - ); - - c_else = end; - end = cf_map[c_else->bc.addr]; - } else { - BCP_DUMP( - sblog << " no else\n"; - ); - - c_else = end; - } - - if (c_else->parent != c->parent) - c_else = NULL; - - if (end && end->parent != c->parent) - end = NULL; - - region_node *reg = sh->create_region(); - - depart_node *dep2 = sh->create_depart(reg); - depart_node *dep = sh->create_depart(reg); - if_node *n_if = sh->create_if(); - - c->insert_before(reg); - - if (c_else != end) - dep->move(c_else, end); - dep2->move(c, end); - - reg->push_back(dep); - dep->push_front(n_if); - n_if->push_back(dep2); - - n_if->cond = sh->get_special_value(SV_EXEC_MASK); - - return 0; -} - - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_context.cpp b/src/gallium/drivers/r600/sb/sb_context.cpp deleted file mode 100644 index 2734b24..0000000 --- a/src/gallium/drivers/r600/sb/sb_context.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include "sb_bc.h" - -namespace r600_sb { - -sb_log sblog; - -unsigned sb_context::dump_pass = 0; -unsigned sb_context::dump_stat = 0; -unsigned sb_context::dry_run = 0; -unsigned sb_context::no_fallback = 0; -unsigned sb_context::safe_math = 0; - -unsigned sb_context::dskip_start = 0; -unsigned sb_context::dskip_end = 0; -unsigned sb_context::dskip_mode = 0; - -int sb_context::init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass) { - if (chip == HW_CHIP_UNKNOWN || cclass == HW_CLASS_UNKNOWN) - return -1; - - this->isa = isa; - - hw_chip = chip; - hw_class = cclass; - - alu_temp_gprs = 4; - - max_fetch = is_r600() ? 8 : 16; - - has_trans = !is_cayman(); - - vtx_src_num = 1; - - num_slots = has_trans ? 5 : 4; - - uses_mova_gpr = is_r600() && chip != HW_CHIP_RV670; - - r6xx_gpr_index_workaround = is_r600() && chip != HW_CHIP_RV670 && chip != HW_CHIP_RS780 && chip != HW_CHIP_RS880; - - switch (chip) { - case HW_CHIP_RV610: - case HW_CHIP_RS780: - case HW_CHIP_RV620: - case HW_CHIP_RS880: - wavefront_size = 16; - stack_entry_size = 8; - break; - case HW_CHIP_RV630: - case HW_CHIP_RV635: - case HW_CHIP_RV730: - case HW_CHIP_RV710: - case HW_CHIP_PALM: - case HW_CHIP_CEDAR: - wavefront_size = 32; - stack_entry_size = 8; - break; - default: - wavefront_size = 64; - stack_entry_size = 4; - break; - } - - stack_workaround_8xx = needs_8xx_stack_workaround(); - stack_workaround_9xx = needs_9xx_stack_workaround(); - - return 0; -} - -const char* sb_context::get_hw_class_name() { - switch (hw_class) { -#define TRANSLATE_HW_CLASS(c) case HW_CLASS_##c: return #c - TRANSLATE_HW_CLASS(R600); - TRANSLATE_HW_CLASS(R700); - TRANSLATE_HW_CLASS(EVERGREEN); - TRANSLATE_HW_CLASS(CAYMAN); -#undef TRANSLATE_HW_CLASS - default: - assert(!"unknown gfx level"); - return "INVALID_CHIP_CLASS"; - } -} - -const char* sb_context::get_hw_chip_name() { - switch (hw_chip) { -#define TRANSLATE_CHIP(c) case HW_CHIP_##c: return #c - TRANSLATE_CHIP(R600); - TRANSLATE_CHIP(RV610); - TRANSLATE_CHIP(RV630); - TRANSLATE_CHIP(RV670); - TRANSLATE_CHIP(RV620); - TRANSLATE_CHIP(RV635); - TRANSLATE_CHIP(RS780); - TRANSLATE_CHIP(RS880); - TRANSLATE_CHIP(RV770); - TRANSLATE_CHIP(RV730); - TRANSLATE_CHIP(RV710); - TRANSLATE_CHIP(RV740); - TRANSLATE_CHIP(CEDAR); - TRANSLATE_CHIP(REDWOOD); - TRANSLATE_CHIP(JUNIPER); - TRANSLATE_CHIP(CYPRESS); - TRANSLATE_CHIP(HEMLOCK); - TRANSLATE_CHIP(PALM); - TRANSLATE_CHIP(SUMO); - TRANSLATE_CHIP(SUMO2); - TRANSLATE_CHIP(BARTS); - TRANSLATE_CHIP(TURKS); - TRANSLATE_CHIP(CAICOS); - TRANSLATE_CHIP(CAYMAN); - TRANSLATE_CHIP(ARUBA); -#undef TRANSLATE_CHIP - - default: - assert(!"unknown chip"); - return "INVALID_CHIP"; - } -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp deleted file mode 100644 index 7c387d2..0000000 --- a/src/gallium/drivers/r600/sb/sb_core.cpp +++ /dev/null @@ -1,338 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#define SB_RA_SCHED_CHECK DEBUG - -#include "util/os_time.h" -#include "r600_pipe.h" -#include "r600_shader.h" - -#include "sb_public.h" - -#include -#include - -#include "sb_bc.h" -#include "sb_shader.h" -#include "sb_pass.h" -#include "sb_sched.h" - -using namespace r600_sb; - -static sb_hw_class translate_chip_class(enum amd_gfx_level cc); -static sb_hw_chip translate_chip(enum radeon_family rf); - -sb_context *r600_sb_context_create(struct r600_context *rctx) { - - sb_context *sctx = new sb_context(); - - if (sctx->init(rctx->isa, translate_chip(rctx->b.family), - translate_chip_class(rctx->b.gfx_level))) { - delete sctx; - sctx = NULL; - } - - unsigned df = rctx->screen->b.debug_flags; - - sb_context::dump_pass = df & DBG_SB_DUMP; - sb_context::dump_stat = df & DBG_SB_STAT; - sb_context::dry_run = df & DBG_SB_DRY_RUN; - sb_context::no_fallback = df & DBG_SB_NO_FALLBACK; - sb_context::safe_math = df & DBG_SB_SAFEMATH; - - sb_context::dskip_start = debug_get_num_option("R600_SB_DSKIP_START", 0); - sb_context::dskip_end = debug_get_num_option("R600_SB_DSKIP_END", 0); - sb_context::dskip_mode = debug_get_num_option("R600_SB_DSKIP_MODE", 0); - - return sctx; -} - -void r600_sb_context_destroy(void * sctx) { - if (sctx) { - sb_context *ctx = static_cast(sctx); - - if (sb_context::dump_stat) { - sblog << "\ncontext src stats: "; - ctx->src_stats.dump(); - sblog << "context opt stats: "; - ctx->opt_stats.dump(); - sblog << "context diff: "; - ctx->src_stats.dump_diff(ctx->opt_stats); - } - - delete ctx; - } -} - -int r600_sb_bytecode_process(struct r600_context *rctx, - struct r600_bytecode *bc, - struct r600_shader *pshader, - int dump_bytecode, - int optimize) { - int r = 0; - unsigned shader_id = bc->debug_id; - - sb_context *ctx = (sb_context *)rctx->sb_context; - if (!ctx) { - rctx->sb_context = ctx = r600_sb_context_create(rctx); - } - - int64_t time_start = 0; - if (sb_context::dump_stat) { - time_start = os_time_get_nano(); - } - - SB_DUMP_STAT( sblog << "\nsb: shader " << shader_id << "\n"; ); - - bc_parser parser(*ctx, bc, pshader); - - if ((r = parser.decode())) { - assert(!"sb: bytecode decoding error"); - return r; - } - - shader *sh = parser.get_shader(); - - if (dump_bytecode) { - bc_dump(*sh, bc->bytecode, bc->ndw).run(); - } - - if (!optimize) { - delete sh; - return 0; - } - - if (sh->target != TARGET_FETCH) { - sh->src_stats.ndw = bc->ndw; - sh->collect_stats(false); - } - - /* skip some shaders (use shaders from default backend) - * dskip_start - range start, dskip_end - range_end, - * e.g. start = 5, end = 6 means shaders 5 & 6 - * - * dskip_mode == 0 - disabled, - * dskip_mode == 1 - don't process the shaders from the [start;end] range - * dskip_mode == 2 - process only the shaders from the range - */ - if (sb_context::dskip_mode) { - if ((sb_context::dskip_start <= shader_id && - shader_id <= sb_context::dskip_end) == - (sb_context::dskip_mode == 1)) { - sblog << "sb: skipped shader " << shader_id << " : " << "[" - << sb_context::dskip_start << "; " - << sb_context::dskip_end << "] mode " - << sb_context::dskip_mode << "\n"; - return 0; - } - } - - if ((r = parser.prepare())) { - assert(!"sb: bytecode parsing error"); - return r; - } - - SB_DUMP_PASS( sblog << "\n\n###### after parse\n"; sh->dump_ir(); ); - -#define SB_RUN_PASS(n, dump) \ - do { \ - r = n(*sh).run(); \ - if (r) { \ - sblog << "sb: error (" << r << ") in the " << #n << " pass.\n"; \ - if (sb_context::no_fallback) \ - return r; \ - sblog << "sb: using unoptimized bytecode...\n"; \ - delete sh; \ - return 0; \ - } \ - if (dump) { \ - SB_DUMP_PASS( sblog << "\n\n###### after " << #n << "\n"; \ - sh->dump_ir();); \ - } \ - assert(!r); \ - } while (0) - - SB_RUN_PASS(ssa_prepare, 0); - SB_RUN_PASS(ssa_rename, 1); - - if (sh->has_alu_predication) - SB_RUN_PASS(psi_ops, 1); - - SB_RUN_PASS(liveness, 0); - - sh->dce_flags = DF_REMOVE_DEAD | DF_EXPAND; - SB_RUN_PASS(dce_cleanup, 0); - SB_RUN_PASS(def_use, 0); - - sh->set_undef(sh->root->live_before); - - // if conversion breaks the dependency tracking between CF_EMIT ops when it removes - // the phi nodes for SV_GEOMETRY_EMIT. Just disable it for GS - if ((sh->target != TARGET_GS && sh->target != TARGET_HS) || pshader->needs_scratch_space) - SB_RUN_PASS(if_conversion, 1); - - // if_conversion breaks info about uses, but next pass (peephole) - // doesn't need it, so we can skip def/use update here - // until it's really required - //SB_RUN_PASS(def_use, 0); - - SB_RUN_PASS(peephole, 1); - SB_RUN_PASS(def_use, 0); - - SB_RUN_PASS(gvn, 1); - - SB_RUN_PASS(def_use, 1); - - sh->dce_flags = DF_REMOVE_DEAD | DF_REMOVE_UNUSED; - SB_RUN_PASS(dce_cleanup, 1); - - SB_RUN_PASS(ra_split, 0); - SB_RUN_PASS(def_use, 0); - - // create 'basic blocks'. it's not like we build CFG, they are just - // container nodes in the correct locations for code placement - sh->create_bbs(); - - SB_RUN_PASS(gcm, 1); - - sh->compute_interferences = true; - SB_RUN_PASS(liveness, 0); - - sh->dce_flags = DF_REMOVE_DEAD; - SB_RUN_PASS(dce_cleanup, 1); - - SB_RUN_PASS(ra_coalesce, 1); - SB_RUN_PASS(ra_init, 1); - - SB_RUN_PASS(post_scheduler, 1); - - sh->expand_bbs(); - -#if SB_RA_SCHED_CHECK - // check code correctness after regalloc/scheduler - SB_RUN_PASS(ra_checker, 0); -#endif - - SB_RUN_PASS(bc_finalizer, 0); - - sh->optimized = true; - - bc_builder builder(*sh); - - if ((r = builder.build())) { - assert(0); - return r; - } - - bytecode &nbc = builder.get_bytecode(); - - if (dump_bytecode) { - bc_dump(*sh, &nbc).run(); - } - - if (!sb_context::dry_run) { - - free(bc->bytecode); - bc->ndw = nbc.ndw(); - bc->bytecode = (uint32_t*) malloc(bc->ndw << 2); - nbc.write_data(bc->bytecode); - - bc->ngpr = sh->ngpr; - bc->nstack = sh->nstack; - } else { - SB_DUMP_STAT( sblog << "sb: dry run: optimized bytecode is not used\n"; ); - } - - if (sb_context::dump_stat) { - int64_t t = os_time_get_nano() - time_start; - - sblog << "sb: processing shader " << shader_id << " done ( " - << ((double)t)/1000000.0 << " ms ).\n"; - - sh->opt_stats.ndw = bc->ndw; - sh->collect_stats(true); - - sblog << "src stats: "; - sh->src_stats.dump(); - sblog << "opt stats: "; - sh->opt_stats.dump(); - sblog << "diff: "; - sh->src_stats.dump_diff(sh->opt_stats); - } - - delete sh; - return 0; -} - -static sb_hw_chip translate_chip(enum radeon_family rf) { - switch (rf) { - -#define TRANSLATE_CHIP(c) case CHIP_##c: return HW_CHIP_##c - TRANSLATE_CHIP(R600); - TRANSLATE_CHIP(RV610); - TRANSLATE_CHIP(RV630); - TRANSLATE_CHIP(RV670); - TRANSLATE_CHIP(RV620); - TRANSLATE_CHIP(RV635); - TRANSLATE_CHIP(RS780); - TRANSLATE_CHIP(RS880); - TRANSLATE_CHIP(RV770); - TRANSLATE_CHIP(RV730); - TRANSLATE_CHIP(RV710); - TRANSLATE_CHIP(RV740); - TRANSLATE_CHIP(CEDAR); - TRANSLATE_CHIP(REDWOOD); - TRANSLATE_CHIP(JUNIPER); - TRANSLATE_CHIP(CYPRESS); - TRANSLATE_CHIP(HEMLOCK); - TRANSLATE_CHIP(PALM); - TRANSLATE_CHIP(SUMO); - TRANSLATE_CHIP(SUMO2); - TRANSLATE_CHIP(BARTS); - TRANSLATE_CHIP(TURKS); - TRANSLATE_CHIP(CAICOS); - TRANSLATE_CHIP(CAYMAN); - TRANSLATE_CHIP(ARUBA); -#undef TRANSLATE_CHIP - - default: - assert(!"unknown chip"); - return HW_CHIP_UNKNOWN; - } -} - -static sb_hw_class translate_chip_class(enum amd_gfx_level cc) { - switch(cc) { - case R600: return HW_CLASS_R600; - case R700: return HW_CLASS_R700; - case EVERGREEN: return HW_CLASS_EVERGREEN; - case CAYMAN: return HW_CLASS_CAYMAN; - - default: - assert(!"unknown gfx level"); - return HW_CLASS_UNKNOWN; - } -} diff --git a/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp b/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp deleted file mode 100644 index abae2bf..0000000 --- a/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include "sb_shader.h" - -#include "sb_pass.h" - -namespace r600_sb { - -int dce_cleanup::run() { - int r; - - // Run cleanup for as long as there are unused nodes. - do { - nodes_changed = false; - r = vpass::run(); - } while (r == 0 && nodes_changed); - - return r; -} - -bool dce_cleanup::visit(node& n, bool enter) { - if (enter) { - } else { - if (n.flags & NF_DEAD) - n.remove(); - else - cleanup_dst(n); - } - return true; -} - -bool dce_cleanup::visit(alu_group_node& n, bool enter) { - if (enter) { - } else { - n.expand(); - } - return true; -} - -bool dce_cleanup::visit(cf_node& n, bool enter) { - if (enter) { - if (n.flags & NF_DEAD) - n.remove(); - else - cleanup_dst(n); - } else { - if ((sh.dce_flags & DF_EXPAND) && - (n.bc.op_ptr->flags & (CF_CLAUSE | CF_BRANCH | CF_LOOP))) - n.expand(); - } - return true; -} - -bool dce_cleanup::visit(alu_node& n, bool enter) { - if (enter) { - } else { - if (n.flags & NF_DEAD) - n.remove(); - else - cleanup_dst(n); - } - return true; -} - -bool dce_cleanup::visit(alu_packed_node& n, bool enter) { - if (enter) { - } else { - if (n.flags & NF_DEAD) - n.remove(); - else - cleanup_dst(n); - } - return false; -} - -bool dce_cleanup::visit(fetch_node& n, bool enter) { - if (enter) { - } else { - if (n.flags & NF_DEAD) - n.remove(); - else - cleanup_dst(n); - } - return true; -} - -bool dce_cleanup::visit(region_node& n, bool enter) { - if (enter) { - if (n.loop_phi) - run_on(*n.loop_phi); - } else { - if (n.phi) - run_on(*n.phi); - } - return true; -} - -void dce_cleanup::cleanup_dst(node& n) { - if (!cleanup_dst_vec(n.dst) && remove_unused && - !n.dst.empty() && !(n.flags & NF_DONT_KILL) && n.parent) - { - // Delete use references to the removed node from the src values. - for (vvec::iterator I = n.src.begin(), E = n.src.end(); I != E; ++I) { - value* v = *I; - if (v && v->def && v->uses.size()) - { - v->remove_use(&n); - } - } - n.remove(); - nodes_changed = true; - } -} - -bool dce_cleanup::visit(container_node& n, bool enter) { - if (enter) - cleanup_dst(n); - return true; -} - -bool dce_cleanup::cleanup_dst_vec(vvec& vv) { - bool alive = false; - - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value* &v = *I; - if (!v) - continue; - - if (v->gvn_source && v->gvn_source->is_dead()) - v->gvn_source = NULL; - - if (v->is_dead() || (remove_unused && !v->is_rel() && !v->uses.size())) - v = NULL; - else - alive = true; - } - - return alive; -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_def_use.cpp b/src/gallium/drivers/r600/sb/sb_def_use.cpp deleted file mode 100644 index 68ab4ca..0000000 --- a/src/gallium/drivers/r600/sb/sb_def_use.cpp +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -int def_use::run() { - run_on(sh.root, true); - run_on(sh.root, false); - return 0; -} - -void def_use::process_phi(container_node *c, bool defs, bool uses) { - - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - node *n = *I; - if (uses) - process_uses(n); - if (defs) - process_defs(n, n->dst, false); - } -} - -void def_use::run_on(node* n, bool defs) { - - bool is_region = (n->type == NT_REGION); - bool is_op = (n->type == NT_OP || n->type == NT_IF); - - if (is_op) { - - if (0) { - sblog << "def_use processing op "; - dump::dump_op(n); - sblog << "\n"; - } - - if (defs) - process_defs(n, n->dst, false); - else - process_uses(n); - } else if (is_region & defs) { - region_node *r = static_cast(n); - if (r->loop_phi) - process_phi(r->loop_phi, true, false); - } - - if (n->is_container() && n->subtype != NST_ALU_PACKED_INST) { - container_node *c = static_cast(n); - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - run_on(*I, defs); - } - } - - if (is_region) { - region_node *r = static_cast(n); - if (r->phi) - process_phi(r->phi, defs, !defs); - if (r->loop_phi && !defs) - process_phi(r->loop_phi, false, true); - } -} - -void def_use::process_defs(node *n, vvec &vv, bool arr_def) { - - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (!v) - continue; - - if (arr_def) - v->adef = n; - else - v->def = n; - - v->delete_uses(); - - if (v->is_rel()) { - process_defs(n, v->mdef, true); - } - } -} - -void def_use::process_uses(node* n) { - for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) { - value *v = *I; - if (!v || v->is_readonly()) - continue; - - if (v->is_rel()) { - if (!v->rel->is_readonly()) - v->rel->add_use(n); - - for (vvec::iterator I = v->muse.begin(), E = v->muse.end(); - I != E; ++I) { - value *v = *I; - if (!v) - continue; - - v->add_use(n); - } - } else - v->add_use(n); - } - - for (vvec::iterator I = n->dst.begin(), E = n->dst.end(); I != E; ++I) { - value *v = *I; - if (!v || !v->is_rel()) - continue; - - if (!v->rel->is_readonly()) - v->rel->add_use(n); - for (vvec::iterator I = v->muse.begin(), E = v->muse.end(); - I != E; ++I) { - value *v = *I; - if (!v) - continue; - - v->add_use(n); - } - } - - if (n->pred) - n->pred->add_use(n); - - if (n->type == NT_IF) { - if_node *i = static_cast(n); - if (i->cond) - i->cond->add_use(i); - } -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_dump.cpp b/src/gallium/drivers/r600/sb/sb_dump.cpp deleted file mode 100644 index bf08be9..0000000 --- a/src/gallium/drivers/r600/sb/sb_dump.cpp +++ /dev/null @@ -1,528 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -bool dump::visit(node& n, bool enter) { - if (enter) { - indent(); - dump_flags(n); - - switch (n.subtype) { - case NST_PHI: - dump_op(n, "* phi"); - break; - case NST_PSI: - dump_op(n, "* psi"); - break; - case NST_COPY: - dump_op(n, "* copy"); - break; - default: - assert(!"invalid node subtype"); - break; - } - sblog << "\n"; - } - return false; -} - -bool dump::visit(container_node& n, bool enter) { - if (enter) { - if (!n.empty()) { - indent(); - dump_flags(n); - sblog << "{ "; - if (!n.dst.empty()) { - sblog << " preloaded inputs ["; - dump_vec(n.dst); - sblog << "] "; - } - dump_live_values(n, true); - } - ++level; - } else { - --level; - if (!n.empty()) { - indent(); - sblog << "} "; - if (!n.src.empty()) { - sblog << " results ["; - dump_vec(n.src); - sblog << "] "; - } - dump_live_values(n, false); - } - } - return true; -} - -bool dump::visit(bb_node& n, bool enter) { - if (enter) { - indent(); - dump_flags(n); - sblog << "{ BB_" << n.id << " loop_level = " << n.loop_level << " "; - dump_live_values(n, true); - ++level; - } else { - --level; - indent(); - sblog << "} end BB_" << n.id << " "; - dump_live_values(n, false); - } - return true; -} - -bool dump::visit(alu_group_node& n, bool enter) { - if (enter) { - indent(); - dump_flags(n); - sblog << "[ "; - dump_live_values(n, true); - - ++level; - } else { - --level; - - indent(); - sblog << "] "; - dump_live_values(n, false); - } - return true; -} - -bool dump::visit(cf_node& n, bool enter) { - if (enter) { - indent(); - dump_flags(n); - dump_op(n, n.bc.op_ptr->name); - - if (n.bc.op_ptr->flags & CF_BRANCH) { - sblog << " @" << (n.bc.addr << 1); - } - - dump_common(n); - sblog << "\n"; - - if (!n.empty()) { - indent(); - sblog << "< "; - dump_live_values(n, true); - } - - ++level; - } else { - --level; - if (!n.empty()) { - indent(); - sblog << "> "; - dump_live_values(n, false); - } - } - return true; -} - -bool dump::visit(alu_node& n, bool enter) { - if (enter) { - indent(); - dump_flags(n); - dump_alu(&n); - dump_common(n); - sblog << "\n"; - - ++level; - } else { - --level; - - } - return true; -} - -bool dump::visit(alu_packed_node& n, bool enter) { - if (enter) { - indent(); - dump_flags(n); - dump_op(n, n.op_ptr()->name); - sblog << " "; - dump_live_values(n, true); - - ++level; - } else { - --level; - if (!n.live_after.empty()) { - indent(); - dump_live_values(n, false); - } - - } - // process children only if their src/dst aren't moved to this node yet - return n.src.empty(); -} - -bool dump::visit(fetch_node& n, bool enter) { - if (enter) { - indent(); - dump_flags(n); - dump_op(n, n.bc.op_ptr->name); - sblog << "\n"; - - ++level; - } else { - --level; - } - return true; -} - -bool dump::visit(region_node& n, bool enter) { - if (enter) { - indent(); - dump_flags(n); - sblog << "region #" << n.region_id << " "; - dump_common(n); - - if (!n.vars_defined.empty()) { - sblog << "vars_defined: "; - dump_set(sh, n.vars_defined); - } - - dump_live_values(n, true); - - ++level; - - if (n.loop_phi) - run_on(*n.loop_phi); - } else { - --level; - - if (n.phi) - run_on(*n.phi); - - indent(); - dump_live_values(n, false); - } - return true; -} - -bool dump::visit(repeat_node& n, bool enter) { - if (enter) { - indent(); - dump_flags(n); - sblog << "repeat region #" << n.target->region_id; - sblog << (n.empty() ? " " : " after { "); - dump_common(n); - sblog << " "; - dump_live_values(n, true); - - ++level; - } else { - --level; - - if (!n.empty()) { - indent(); - sblog << "} end_repeat "; - dump_live_values(n, false); - } - } - return true; -} - -bool dump::visit(depart_node& n, bool enter) { - if (enter) { - indent(); - dump_flags(n); - sblog << "depart region #" << n.target->region_id; - sblog << (n.empty() ? " " : " after { "); - dump_common(n); - sblog << " "; - dump_live_values(n, true); - - ++level; - } else { - --level; - if (!n.empty()) { - indent(); - sblog << "} end_depart "; - dump_live_values(n, false); - } - } - return true; -} - -bool dump::visit(if_node& n, bool enter) { - if (enter) { - indent(); - dump_flags(n); - sblog << "if " << *n.cond << " "; - dump_common(n); - sblog << " "; - dump_live_values(n, true); - - indent(); - sblog <<"{\n"; - - ++level; - } else { - --level; - indent(); - sblog << "} endif "; - dump_live_values(n, false); - } - return true; -} - -void dump::indent() { - sblog.print_wl("", level * 4); -} - -void dump::dump_vec(const vvec & vv) { - bool first = true; - for(vvec::const_iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (!first) - sblog << ", "; - else - first = false; - - if (v) { - sblog << *v; - } else { - sblog << "__"; - } - } -} - -void dump::dump_rels(vvec & vv) { - for(vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - - if (!v || !v->is_rel()) - continue; - - sblog << "\n\t\t\t\t\t"; - sblog << " rels: " << *v << " : "; - dump_vec(v->mdef); - sblog << " <= "; - dump_vec(v->muse); - } -} - -void dump::dump_op(node &n, const char *name) { - - if (n.pred) { - alu_node &a = static_cast(n); - sblog << (a.bc.pred_sel-2) << " [" << *a.pred << "] "; - } - - sblog << name; - - bool has_dst = !n.dst.empty(); - - if (n.subtype == NST_CF_INST) { - cf_node *c = static_cast(&n); - if (c->bc.op_ptr->flags & CF_EXP) { - static const char *exp_type[] = {"PIXEL", "POS ", "PARAM"}; - sblog << " " << exp_type[c->bc.type] << " " << c->bc.array_base; - has_dst = false; - } else if (c->bc.op_ptr->flags & (CF_MEM)) { - static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK", - "WRITE_IND_ACK"}; - sblog << " " << exp_type[c->bc.type] << " " << c->bc.array_base - << " ES:" << c->bc.elem_size; - if (!(c->bc.op_ptr->flags & CF_EMIT)) { - has_dst = false; - } - } - } - - sblog << " "; - - if (has_dst) { - dump_vec(n.dst); - sblog << ", "; - } - - if (n.subtype == NST_FETCH_INST) { - fetch_node *f = static_cast(&n); - if (f->bc.indexed) - dump_vec(n.src); - } else - dump_vec(n.src); -} - -void dump::dump_set(shader &sh, val_set& v) { - sblog << "["; - for(val_set::iterator I = v.begin(sh), E = v.end(sh); I != E; ++I) { - value *val = *I; - sblog << *val << " "; - } - sblog << "]"; -} - -void dump::dump_common(node& n) { -} - -void dump::dump_flags(node &n) { - if (n.flags & NF_DEAD) - sblog << "### DEAD "; - if (n.flags & NF_REG_CONSTRAINT) - sblog << "R_CONS "; - if (n.flags & NF_CHAN_CONSTRAINT) - sblog << "CH_CONS "; - if (n.flags & NF_ALU_4SLOT) - sblog << "4S "; - if (n.flags & NF_ALU_2SLOT) - sblog << "2S "; -} - -void dump::dump_val(value* v) { - sblog << *v; -} - -void dump::dump_alu(alu_node *n) { - - if (n->is_copy_mov()) - sblog << "(copy) "; - - if (n->pred) { - sblog << (n->bc.pred_sel-2) << " [" << *n->pred << "] "; - } - - sblog << n->bc.op_ptr->name; - - if (n->bc.omod) { - static const char *omod_str[] = {"", "*2", "*4", "/2"}; - sblog << omod_str[n->bc.omod]; - } - - if (n->bc.clamp) { - sblog << "_sat"; - } - - bool has_dst = !n->dst.empty(); - - sblog << " "; - - if (has_dst) { - dump_vec(n->dst); - sblog << ", "; - } - - unsigned s = 0; - for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; - ++I, ++s) { - - bc_alu_src &src = n->bc.src[s]; - - if (src.neg) - sblog << "-"; - - if (src.abs) - sblog << "|"; - - dump_val(*I); - - if (src.abs) - sblog << "|"; - - if (I + 1 != E) - sblog << ", "; - } - - dump_rels(n->dst); - dump_rels(n->src); - -} - -void dump::dump_op(node* n) { - if (n->type == NT_IF) { - dump_op(*n, "IF "); - return; - } - - switch(n->subtype) { - case NST_ALU_INST: - dump_alu(static_cast(n)); - break; - case NST_FETCH_INST: - dump_op(*n, static_cast(n)->bc.op_ptr->name); - break; - case NST_CF_INST: - case NST_ALU_CLAUSE: - case NST_TEX_CLAUSE: - case NST_VTX_CLAUSE: - case NST_GDS_CLAUSE: - dump_op(*n, static_cast(n)->bc.op_ptr->name); - break; - case NST_ALU_PACKED_INST: - dump_op(*n, static_cast(n)->op_ptr()->name); - break; - case NST_PHI: - dump_op(*n, "PHI"); - break; - case NST_PSI: - dump_op(*n, "PSI"); - break; - case NST_COPY: - dump_op(*n, "COPY"); - break; - default: - dump_op(*n, "??unknown_op"); - } -} - -void dump::dump_op_list(container_node* c) { - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - dump_op(*I); - sblog << "\n"; - } -} - -void dump::dump_queue(sched_queue& q) { - for (sched_queue::iterator I = q.begin(), E = q.end(); I != E; ++I) { - dump_op(*I); - sblog << "\n"; - } -} - -void dump::dump_live_values(container_node &n, bool before) { - if (before) { - if (!n.live_before.empty()) { - sblog << "live_before: "; - dump_set(sh, n.live_before); - } - } else { - if (!n.live_after.empty()) { - sblog << "live_after: "; - dump_set(sh, n.live_after); - } - } - sblog << "\n"; -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp b/src/gallium/drivers/r600/sb/sb_expr.cpp deleted file mode 100644 index cabe6a1..0000000 --- a/src/gallium/drivers/r600/sb/sb_expr.cpp +++ /dev/null @@ -1,1326 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include - -#include "sb_shader.h" - -namespace r600_sb { - -value* get_select_value_for_em(shader& sh, value* em) { - if (!em->def) - return NULL; - - node *predset = em->def; - if (!predset->is_pred_set()) - return NULL; - - alu_node *s = sh.clone(static_cast(predset)); - convert_predset_to_set(sh, s); - - predset->insert_after(s); - - value* &d0 = s->dst[0]; - d0 = sh.create_temp_value(); - d0->def = s; - return d0; -} - -void convert_to_mov(alu_node &n, value *src, bool neg, bool abs) { - n.src.resize(1); - n.src[0] = src; - n.bc.src[0].abs = abs; - n.bc.src[0].neg = neg; - n.bc.set_op(ALU_OP1_MOV); -} - -expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {} - -value * expr_handler::get_const(const literal &l) { - value *v = sh.get_const_value(l); - if (!v->gvn_source) - vt.add_value(v); - return v; -} - -void expr_handler::assign_source(value *dst, value *src) { - dst->gvn_source = src->gvn_source; -} - -bool expr_handler::equal(value *l, value *r) { - - assert(l != r); - - if (l->is_lds_access() || r->is_lds_access()) - return false; - if (l->gvalue() == r->gvalue()) - return true; - - if (l->def && r->def) - return defs_equal(l, r); - - if (l->is_rel() && r->is_rel()) - return ivars_equal(l, r); - - return false; -} - -bool expr_handler::ivars_equal(value* l, value* r) { - if (l->rel->gvalue() == r->rel->gvalue() - && l->select == r->select) { - - vvec &lv = l->mdef.empty() ? l->muse : l->mdef; - vvec &rv = r->mdef.empty() ? r->muse : r->mdef; - - // FIXME: replace this with more precise aliasing test - return lv == rv; - } - return false; -} - -bool expr_handler::defs_equal(value* l, value* r) { - - node *d1 = l->def; - node *d2 = r->def; - - if (d1->type != d2->type || d1->subtype != d2->subtype) - return false; - - if (d1->is_pred_set() || d2->is_pred_set()) - return false; - - if (d1->type == NT_OP) { - switch (d1->subtype) { - case NST_ALU_INST: - return ops_equal( - static_cast(d1), - static_cast(d2)); -// case NST_FETCH_INST: return ops_equal(static_cast(d1), -// static_cast(d2); -// case NST_CF_INST: return ops_equal(static_cast(d1), -// static_cast(d2); - default: - break; - } - } - return false; -} - -bool expr_handler::try_fold(value* v) { - assert(!v->gvn_source); - - if (v->def) - try_fold(v->def); - - if (v->gvn_source) - return true; - - return false; -} - -bool expr_handler::try_fold(node* n) { - return n->fold_dispatch(this); -} - -bool expr_handler::fold(node& n) { - if (n.subtype == NST_PHI) { - - value *s = n.src[0]; - - // FIXME disabling phi folding for registers for now, otherwise we lose - // control flow information in some cases - // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test) - // probably control flow transformation is required to enable it - if (s->is_sgpr()) - return false; - - for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) { - value *v = *I; - if (!s->v_equal(v)) - return false; - } - - assign_source(n.dst[0], s); - } else { - assert(n.subtype == NST_PSI); - assert(n.src.size() >= 6); - - value *s = n.src[2]; - assert(s->gvn_source); - - for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) { - value *v = *(I+2); - if (!s->v_equal(v)) - return false; - } - assign_source(n.dst[0], s); - } - return true; -} - -bool expr_handler::fold(container_node& n) { - return false; -} - -bool expr_handler::fold_setcc(alu_node &n) { - - value* v0 = n.src[0]->gvalue(); - value* v1 = n.src[1]->gvalue(); - - assert(v0 && v1 && n.dst[0]); - - unsigned flags = n.bc.op_ptr->flags; - unsigned cc = flags & AF_CC_MASK; - unsigned cmp_type = flags & AF_CMP_TYPE_MASK; - unsigned dst_type = flags & AF_DST_TYPE_MASK; - - bool cond_result; - bool have_result = false; - - bool isc0 = v0->is_const(); - bool isc1 = v1->is_const(); - - literal dv, cv0, cv1; - - if (isc0) { - cv0 = v0->get_const_value(); - apply_alu_src_mod(n.bc, 0, cv0); - } - - if (isc1) { - cv1 = v1->get_const_value(); - apply_alu_src_mod(n.bc, 1, cv1); - } - - if (isc0 && isc1) { - cond_result = evaluate_condition(flags, cv0, cv1); - have_result = true; - } else if (isc1) { - if (cmp_type == AF_FLOAT_CMP) { - if (n.bc.src[0].abs && !n.bc.src[0].neg) { - if (cv1.f < 0.0f && cc == AF_CC_NE) { - cond_result = true; - have_result = true; - } - } else if (n.bc.src[0].abs && n.bc.src[0].neg) { - if (cv1.f > 0.0f && cc == AF_CC_E) { - cond_result = false; - have_result = true; - } - } - } else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) { - cond_result = true; - have_result = true; - } - } else if (isc0) { - if (cmp_type == AF_FLOAT_CMP) { - if (n.bc.src[1].abs && !n.bc.src[1].neg) { - if (cv0.f < 0.0f && (cc == AF_CC_E)) { - cond_result = false; - have_result = true; - } - } else if (n.bc.src[1].abs && n.bc.src[1].neg) { - if (cv0.f > 0.0f && cc == AF_CC_NE) { - cond_result = true; - have_result = true; - } - } - } else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) { - cond_result = false; - have_result = true; - } - } else if (v0 == v1) { - bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1]; - if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) { - // NOTE can't handle float comparisons here because of NaNs - cond_result = (cc == AF_CC_E || cc == AF_CC_GE); - have_result = true; - } - } - - if (have_result) { - literal result; - - if (cond_result) - result = dst_type != AF_FLOAT_DST ? - literal(0xFFFFFFFFu) : literal(1.0f); - else - result = literal(0); - - convert_to_mov(n, sh.get_const_value(result)); - return fold_alu_op1(n); - } - - return false; -} - -bool expr_handler::fold(alu_node& n) { - - switch (n.bc.op_ptr->src_count) { - case 1: return fold_alu_op1(n); - case 2: return fold_alu_op2(n); - case 3: return fold_alu_op3(n); - default: - assert(0); - } - return false; -} - -bool expr_handler::fold(fetch_node& n) { - - unsigned chan = 0; - for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) { - value* &v = *I; - if (v) { - if (n.bc.dst_sel[chan] == SEL_0) - assign_source(*I, get_const(0.0f)); - else if (n.bc.dst_sel[chan] == SEL_1) - assign_source(*I, get_const(1.0f)); - } - ++chan; - } - return false; -} - -bool expr_handler::fold(cf_node& n) { - return false; -} - -void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src, - literal &v) { - const bc_alu_src &s = bc.src[src]; - - if (s.abs) - v = fabsf(v.f); - if (s.neg) - v = -v.f; -} - -void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) { - const float omod_coeff[] = {2.0f, 4.0, 0.5f}; - - if (bc.omod) - v = v.f * omod_coeff[bc.omod - 1]; - if (bc.clamp) - v = float_clamp(v.f); -} - -bool expr_handler::args_equal(const vvec &l, const vvec &r) { - - assert(l.size() == r.size()); - - int s = l.size(); - - for (int k = 0; k < s; ++k) { - if (!l[k]->v_equal(r[k])) - return false; - } - - return true; -} - -bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) { - const bc_alu &b0 = l->bc; - const bc_alu &b1 = r->bc; - - if (b0.op != b1.op) - return false; - - unsigned src_count = b0.op_ptr->src_count; - - if (b0.index_mode != b1.index_mode) - return false; - - if (b0.clamp != b1.clamp || b0.omod != b1.omod) - return false; - - for (unsigned s = 0; s < src_count; ++s) { - const bc_alu_src &s0 = b0.src[s]; - const bc_alu_src &s1 = b1.src[s]; - - if (s0.abs != s1.abs || s0.neg != s1.neg) - return false; - } - return args_equal(l->src, r->src); -} - -bool expr_handler::fold_alu_op1(alu_node& n) { - - assert(!n.src.empty()); - if (n.src.empty()) - return false; - - /* don't fold LDS instructions */ - if (n.bc.op_ptr->flags & AF_LDS) - return false; - - value* v0 = n.src[0]->gvalue(); - - if (v0->is_lds_oq() || v0->is_lds_access()) - return false; - assert(v0 && n.dst[0]); - - if (!v0->is_const()) { - // handle (MOV -(MOV -x)) => (MOV x) - if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs - && v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) { - alu_node *sd = static_cast(v0->def); - if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs && - sd->bc.src[0].neg) { - n.src[0] = sd->src[0]; - n.bc.src[0].neg = 0; - v0 = n.src[0]->gvalue(); - } - } - - if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT || - n.bc.op == ALU_OP1_MOVA_GPR_INT) - && n.bc.clamp == 0 && n.bc.omod == 0 - && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 && - n.src.size() == 1 /* RIM/SIM can be appended as additional values */ - && n.dst[0]->no_reladdr_conflict_with(v0)) { - assign_source(n.dst[0], v0); - return true; - } - return false; - } - - literal dv, cv = v0->get_const_value(); - apply_alu_src_mod(n.bc, 0, cv); - - switch (n.bc.op) { - case ALU_OP1_CEIL: dv = ceilf(cv.f); break; - case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break; - case ALU_OP1_EXP_IEEE: dv = exp2f(cv.f); break; - case ALU_OP1_FLOOR: dv = floorf(cv.f); break; - case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ???? - case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floorf(cv.f); break; - case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floorf(cv.f + 0.5f); break; - case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)truncf(cv.f); break; - case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break; - case ALU_OP1_FRACT: dv = cv.f - floorf(cv.f); break; - case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break; - case ALU_OP1_LOG_CLAMPED: - case ALU_OP1_LOG_IEEE: - if (cv.f != 0.0f) - dv = log2f(cv.f); - else - // don't fold to NAN, let the GPU handle it for now - // (prevents degenerate LIT tests from failing) - return false; - break; - case ALU_OP1_MOV: dv = cv; break; - case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ??? -// case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break; -// case ALU_OP1_MOVA_GPR_INT: - case ALU_OP1_NOT_INT: dv = ~cv.i; break; - case ALU_OP1_PRED_SET_INV: - dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break; - case ALU_OP1_PRED_SET_RESTORE: dv = cv; break; - case ALU_OP1_RECIPSQRT_CLAMPED: - case ALU_OP1_RECIPSQRT_FF: - case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrtf(cv.f); break; - case ALU_OP1_RECIP_CLAMPED: - case ALU_OP1_RECIP_FF: - case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break; -// case ALU_OP1_RECIP_INT: - case ALU_OP1_RECIP_UINT: { - if (!cv.u) - return false; - dv.u = (1ull << 32) / cv.u; - break; - } - // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break; - case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break; - case ALU_OP1_SQRT_IEEE: dv = sqrtf(cv.f); break; - case ALU_OP1_TRUNC: dv = truncf(cv.f); break; - - default: - return false; - } - - apply_alu_dst_mod(n.bc, dv); - assign_source(n.dst[0], get_const(dv)); - return true; -} - -bool expr_handler::fold_mul_add(alu_node *n) { - - bool ieee; - value* v0 = n->src[0]->gvalue(); - - alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ? - static_cast(v0->def) : NULL; - - if (d0) { - if (d0->is_alu_op(ALU_OP2_MUL_IEEE)) - ieee = true; - else if (d0->is_alu_op(ALU_OP2_MUL)) - ieee = false; - else - return false; - - if (!d0->bc.src[0].abs && !d0->bc.src[1].abs && - !n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod && - !d0->bc.clamp && !n->bc.omod && - (!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() || - !n->src[1]->is_kcache())) { - - bool mul_neg = n->bc.src[0].neg; - - n->src.resize(3); - n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD); - n->src[2] = n->src[1]; - n->bc.src[2] = n->bc.src[1]; - n->src[0] = d0->src[0]; - n->bc.src[0] = d0->bc.src[0]; - n->src[1] = d0->src[1]; - n->bc.src[1] = d0->bc.src[1]; - - n->bc.src[0].neg ^= mul_neg; - - fold_alu_op3(*n); - return true; - } - } - - value* v1 = n->src[1]->gvalue(); - - alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ? - static_cast(v1->def) : NULL; - - if (d1) { - if (d1->is_alu_op(ALU_OP2_MUL_IEEE)) - ieee = true; - else if (d1->is_alu_op(ALU_OP2_MUL)) - ieee = false; - else - return false; - - if (!d1->bc.src[1].abs && !d1->bc.src[0].abs && - !n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod && - !d1->bc.clamp && !n->bc.omod && - (!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() || - !n->src[0]->is_kcache())) { - - bool mul_neg = n->bc.src[1].neg; - - n->src.resize(3); - n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD); - n->src[2] = n->src[0]; - n->bc.src[2] = n->bc.src[0]; - n->src[1] = d1->src[1]; - n->bc.src[1] = d1->bc.src[1]; - n->src[0] = d1->src[0]; - n->bc.src[0] = d1->bc.src[0]; - - n->bc.src[1].neg ^= mul_neg; - - fold_alu_op3(*n); - return true; - } - } - - return false; -} - -bool expr_handler::eval_const_op(unsigned op, literal &r, - literal cv0, literal cv1) { - - switch (op) { - case ALU_OP2_ADD: r = cv0.f + cv1.f; break; - case ALU_OP2_ADDC_UINT: - r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break; - case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break; - case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break; - case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break; - case ALU_OP2_BFM_INT: - r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break; - case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break; - case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break; - case ALU_OP2_MAX: - case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break; - case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break; - case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break; - case ALU_OP2_MIN: - case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break; - case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break; - case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break; - case ALU_OP2_MUL: - case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break; - case ALU_OP2_MULHI_INT: - r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break; - case ALU_OP2_MULHI_UINT: - r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break; - case ALU_OP2_MULLO_INT: - r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break; - case ALU_OP2_MULLO_UINT: - r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break; - case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break; - case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break; - case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break; - - default: - return false; - } - - return true; -} - -// fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5) -bool expr_handler::fold_assoc(alu_node *n) { - - alu_node *a = n; - literal cr; - - int last_arg = -3; - - unsigned op = n->bc.op; - bool allow_neg = false, cur_neg = false; - bool distribute_neg = false; - - switch(op) { - case ALU_OP2_ADD: - distribute_neg = true; - allow_neg = true; - break; - case ALU_OP2_MUL: - case ALU_OP2_MUL_IEEE: - allow_neg = true; - break; - case ALU_OP3_MULADD: - allow_neg = true; - op = ALU_OP2_MUL; - break; - case ALU_OP3_MULADD_IEEE: - allow_neg = true; - op = ALU_OP2_MUL_IEEE; - break; - default: - if (n->bc.op_ptr->src_count != 2) - return false; - } - - // check if we can evaluate the op - if (!eval_const_op(op, cr, literal(0), literal(0))) - return false; - - while (true) { - - value *v0 = a->src[0]->gvalue(); - value *v1 = a->src[1]->gvalue(); - - last_arg = -2; - - if (v1->is_const()) { - literal arg = v1->get_const_value(); - apply_alu_src_mod(a->bc, 1, arg); - if (cur_neg && distribute_neg) - arg.f = -arg.f; - - if (a == n) - cr = arg; - else - eval_const_op(op, cr, cr, arg); - - if (v0->def) { - alu_node *d0 = static_cast(v0->def); - if ((d0->is_alu_op(op) || - (op == ALU_OP2_MUL_IEEE && - d0->is_alu_op(ALU_OP2_MUL))) && - !d0->bc.omod && !d0->bc.clamp && - !a->bc.src[0].abs && - (!a->bc.src[0].neg || allow_neg)) { - cur_neg ^= a->bc.src[0].neg; - a = d0; - continue; - } - } - last_arg = 0; - - } - - if (v0->is_const()) { - literal arg = v0->get_const_value(); - apply_alu_src_mod(a->bc, 0, arg); - if (cur_neg && distribute_neg) - arg.f = -arg.f; - - if (last_arg == 0) { - eval_const_op(op, cr, cr, arg); - last_arg = -1; - break; - } - - if (a == n) - cr = arg; - else - eval_const_op(op, cr, cr, arg); - - if (v1->def) { - alu_node *d1 = static_cast(v1->def); - if ((d1->is_alu_op(op) || - (op == ALU_OP2_MUL_IEEE && - d1->is_alu_op(ALU_OP2_MUL))) && - !d1->bc.omod && !d1->bc.clamp && - !a->bc.src[1].abs && - (!a->bc.src[1].neg || allow_neg)) { - cur_neg ^= a->bc.src[1].neg; - a = d1; - continue; - } - } - - last_arg = 1; - } - - break; - }; - - if (last_arg == -1) { - // result is const - apply_alu_dst_mod(n->bc, cr); - - if (n->bc.op == op) { - convert_to_mov(*n, sh.get_const_value(cr)); - fold_alu_op1(*n); - return true; - } else { // MULADD => ADD - n->src[0] = n->src[2]; - n->bc.src[0] = n->bc.src[2]; - n->src[1] = sh.get_const_value(cr); - n->bc.src[1].clear(); - - n->src.resize(2); - n->bc.set_op(ALU_OP2_ADD); - } - } else if (last_arg >= 0) { - n->src[0] = a->src[last_arg]; - n->bc.src[0] = a->bc.src[last_arg]; - n->bc.src[0].neg ^= cur_neg; - n->src[1] = sh.get_const_value(cr); - n->bc.src[1].clear(); - } - - return false; -} - -bool expr_handler::fold_alu_op2(alu_node& n) { - - if (n.src.size() < 2) - return false; - - unsigned flags = n.bc.op_ptr->flags; - - if (flags & AF_SET) { - return fold_setcc(n); - } - - if (!sh.safe_math && (flags & AF_M_ASSOC)) { - if (fold_assoc(&n)) - return true; - } - - value* v0 = n.src[0]->gvalue(); - value* v1 = n.src[1]->gvalue(); - - assert(v0 && v1); - - // handle some operations with equal args, e.g. x + x => x * 2 - if (v0 == v1) { - if (n.bc.src[0].neg == n.bc.src[1].neg && - n.bc.src[0].abs == n.bc.src[1].abs) { - switch (n.bc.op) { - case ALU_OP2_MIN: // (MIN x, x) => (MOV x) - case ALU_OP2_MIN_DX10: - case ALU_OP2_MAX: - case ALU_OP2_MAX_DX10: - convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs); - return fold_alu_op1(n); - case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2) - if (!sh.safe_math) { - n.src[1] = sh.get_const_value(2.0f); - n.bc.src[1].clear(); - n.bc.set_op(ALU_OP2_MUL); - return fold_alu_op2(n); - } - break; - } - } - if (n.bc.src[0].neg != n.bc.src[1].neg && - n.bc.src[0].abs == n.bc.src[1].abs) { - switch (n.bc.op) { - case ALU_OP2_ADD: // (ADD x, -x) => (MOV 0) - if (!sh.safe_math) { - convert_to_mov(n, sh.get_const_value(literal(0))); - return fold_alu_op1(n); - } - break; - } - } - } - - if (n.bc.op == ALU_OP2_ADD) { - if (fold_mul_add(&n)) - return true; - } - - bool isc0 = v0->is_const(); - bool isc1 = v1->is_const(); - - if (!isc0 && !isc1) - return false; - - literal dv, cv0, cv1; - - if (isc0) { - cv0 = v0->get_const_value(); - apply_alu_src_mod(n.bc, 0, cv0); - } - - if (isc1) { - cv1 = v1->get_const_value(); - apply_alu_src_mod(n.bc, 1, cv1); - } - - if (isc0 && isc1) { - - if (!eval_const_op(n.bc.op, dv, cv0, cv1)) - return false; - - } else { // one source is const - - if (isc0 && cv0 == literal(0)) { - switch (n.bc.op) { - case ALU_OP2_ADD: - case ALU_OP2_ADD_INT: - case ALU_OP2_MAX_UINT: - case ALU_OP2_OR_INT: - case ALU_OP2_XOR_INT: - convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs); - return fold_alu_op1(n); - case ALU_OP2_AND_INT: - case ALU_OP2_ASHR_INT: - case ALU_OP2_LSHL_INT: - case ALU_OP2_LSHR_INT: - case ALU_OP2_MIN_UINT: - case ALU_OP2_MUL: - case ALU_OP2_MULHI_UINT: - case ALU_OP2_MULLO_UINT: - convert_to_mov(n, sh.get_const_value(literal(0))); - return fold_alu_op1(n); - } - } else if (isc1 && cv1 == literal(0)) { - switch (n.bc.op) { - case ALU_OP2_ADD: - case ALU_OP2_ADD_INT: - case ALU_OP2_ASHR_INT: - case ALU_OP2_LSHL_INT: - case ALU_OP2_LSHR_INT: - case ALU_OP2_MAX_UINT: - case ALU_OP2_OR_INT: - case ALU_OP2_SUB_INT: - case ALU_OP2_XOR_INT: - convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs); - return fold_alu_op1(n); - case ALU_OP2_AND_INT: - case ALU_OP2_MIN_UINT: - case ALU_OP2_MUL: - case ALU_OP2_MULHI_UINT: - case ALU_OP2_MULLO_UINT: - convert_to_mov(n, sh.get_const_value(literal(0))); - return fold_alu_op1(n); - } - } else if (isc0 && cv0 == literal(1.0f)) { - switch (n.bc.op) { - case ALU_OP2_MUL: - case ALU_OP2_MUL_IEEE: - convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs); - return fold_alu_op1(n); - } - } else if (isc1 && cv1 == literal(1.0f)) { - switch (n.bc.op) { - case ALU_OP2_MUL: - case ALU_OP2_MUL_IEEE: - convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs); - return fold_alu_op1(n); - } - } - - return false; - } - - apply_alu_dst_mod(n.bc, dv); - assign_source(n.dst[0], get_const(dv)); - return true; -} - -bool expr_handler::evaluate_condition(unsigned alu_cnd_flags, - literal s1, literal s2) { - - unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK; - unsigned cc = alu_cnd_flags & AF_CC_MASK; - - switch (cmp_type) { - case AF_FLOAT_CMP: { - switch (cc) { - case AF_CC_E : return s1.f == s2.f; - case AF_CC_GT: return s1.f > s2.f; - case AF_CC_GE: return s1.f >= s2.f; - case AF_CC_NE: return s1.f != s2.f; - case AF_CC_LT: return s1.f < s2.f; - case AF_CC_LE: return s1.f <= s2.f; - default: - assert(!"invalid condition code"); - return false; - } - } - case AF_INT_CMP: { - switch (cc) { - case AF_CC_E : return s1.i == s2.i; - case AF_CC_GT: return s1.i > s2.i; - case AF_CC_GE: return s1.i >= s2.i; - case AF_CC_NE: return s1.i != s2.i; - case AF_CC_LT: return s1.i < s2.i; - case AF_CC_LE: return s1.i <= s2.i; - default: - assert(!"invalid condition code"); - return false; - } - } - case AF_UINT_CMP: { - switch (cc) { - case AF_CC_E : return s1.u == s2.u; - case AF_CC_GT: return s1.u > s2.u; - case AF_CC_GE: return s1.u >= s2.u; - case AF_CC_NE: return s1.u != s2.u; - case AF_CC_LT: return s1.u < s2.u; - case AF_CC_LE: return s1.u <= s2.u; - default: - assert(!"invalid condition code"); - return false; - } - } - default: - assert(!"invalid cmp_type"); - return false; - } -} - -bool expr_handler::fold_alu_op3(alu_node& n) { - - if (n.src.size() < 3) - return false; - - if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) { - if (fold_assoc(&n)) - return true; - if (n.src.size() < 3) - return fold_alu_op2(n); - } - - value* v0 = n.src[0]->gvalue(); - value* v1 = n.src[1]->gvalue(); - value* v2 = n.src[2]->gvalue(); - - /* LDS instructions look like op3 with no dst - don't fold. */ - if (!n.dst[0]) - return false; - assert(v0 && v1 && v2 && n.dst[0]); - - bool isc0 = v0->is_const(); - bool isc1 = v1->is_const(); - bool isc2 = v2->is_const(); - - literal dv, cv0, cv1, cv2; - - if (isc0) { - cv0 = v0->get_const_value(); - apply_alu_src_mod(n.bc, 0, cv0); - } - - if (isc1) { - cv1 = v1->get_const_value(); - apply_alu_src_mod(n.bc, 1, cv1); - } - - if (isc2) { - cv2 = v2->get_const_value(); - apply_alu_src_mod(n.bc, 2, cv2); - } - - unsigned flags = n.bc.op_ptr->flags; - - if (flags & AF_CMOV) { - int src = 0; - - if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) { - // result doesn't depend on condition, convert to MOV - src = 1; - } else if (isc0) { - // src0 is const, condition can be evaluated, convert to MOV - bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK | - AF_CMP_TYPE_MASK), cv0, literal(0)); - src = cond ? 1 : 2; - } - - if (src) { - // if src is selected, convert to MOV - convert_to_mov(n, n.src[src], n.bc.src[src].neg); - return fold_alu_op1(n); - } - } - - // handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b)) - if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD || - n.bc.op == ALU_OP3_MULADD_IEEE)) { - - unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ? - ALU_OP2_MUL_IEEE : ALU_OP2_MUL; - - if (!isc2 && v2->def && v2->def->is_alu_op(op)) { - - alu_node *md = static_cast(v2->def); - value *mv0 = md->src[0]->gvalue(); - value *mv1 = md->src[1]->gvalue(); - - int es0 = -1, es1 = -1; - - if (v0 == mv0) { - es0 = 0; - es1 = 0; - } else if (v0 == mv1) { - es0 = 0; - es1 = 1; - } else if (v1 == mv0) { - es0 = 1; - es1 = 0; - } else if (v1 == mv1) { - es0 = 1; - es1 = 1; - } - - value *va0 = es0 == 0 ? v1 : v0; - value *va1 = es1 == 0 ? mv1 : mv0; - - /* Don't fold if no equal multipliers were found. - * Also don#t fold if the operands of the to be created ADD are both - * relatively accessed with different AR values because that would - * create impossible code. - */ - if (es0 != -1 && - (!va0->is_rel() || !va1->is_rel() || - (va0->rel == va1->rel))) { - - alu_node *add = sh.create_alu(); - add->bc.set_op(ALU_OP2_ADD); - - add->dst.resize(1); - add->src.resize(2); - - value *t = sh.create_temp_value(); - t->def = add; - add->dst[0] = t; - add->src[0] = va0; - add->src[1] = va1; - add->bc.src[0] = n.bc.src[!es0]; - add->bc.src[1] = md->bc.src[!es1]; - - add->bc.src[1].neg ^= n.bc.src[2].neg ^ - (n.bc.src[es0].neg != md->bc.src[es1].neg); - - n.insert_before(add); - vt.add_value(t); - - t = t->gvalue(); - - if (es0 == 1) { - n.src[0] = n.src[1]; - n.bc.src[0] = n.bc.src[1]; - } - - n.src[1] = t; - n.bc.src[1].clear(); - - n.src.resize(2); - - n.bc.set_op(op); - return fold_alu_op2(n); - } - } - } - - if (!isc0 && !isc1 && !isc2) - return false; - - if (isc0 && isc1 && isc2) { - switch (n.bc.op) { - case ALU_OP3_MULADD_IEEE: - case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break; - - // TODO - - default: - return false; - } - } else { - if (isc0 && isc1) { - switch (n.bc.op) { - case ALU_OP3_MULADD: - case ALU_OP3_MULADD_IEEE: - dv = cv0.f * cv1.f; - n.bc.set_op(ALU_OP2_ADD); - n.src[0] = sh.get_const_value(dv); - n.bc.src[0].clear(); - n.src[1] = n.src[2]; - n.bc.src[1] = n.bc.src[2]; - n.src.resize(2); - return fold_alu_op2(n); - } - } - - if (n.bc.op == ALU_OP3_MULADD) { - if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) { - convert_to_mov(n, n.src[2], n.bc.src[2].neg, n.bc.src[2].abs); - return fold_alu_op1(n); - } - } - - if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) { - unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ? - ALU_OP2_MUL_IEEE : ALU_OP2_MUL; - - if (isc1 && v0 == v2) { - cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f); - n.src[1] = sh.get_const_value(cv1); - n.bc.src[1].neg = 0; - n.bc.src[1].abs = 0; - n.bc.set_op(op); - n.src.resize(2); - return fold_alu_op2(n); - } else if (isc0 && v1 == v2) { - cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f); - n.src[0] = sh.get_const_value(cv0); - n.bc.src[0].neg = 0; - n.bc.src[0].abs = 0; - n.bc.set_op(op); - n.src.resize(2); - return fold_alu_op2(n); - } - } - - return false; - } - - apply_alu_dst_mod(n.bc, dv); - assign_source(n.dst[0], get_const(dv)); - return true; -} - -unsigned invert_setcc_condition(unsigned cc, bool &swap_args) { - unsigned ncc = 0; - - switch (cc) { - case AF_CC_E: ncc = AF_CC_NE; break; - case AF_CC_NE: ncc = AF_CC_E; break; - case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break; - case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break; - default: - assert(!"unexpected condition code"); - break; - } - return ncc; -} - -unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) { - - if (int_dst && cmp_type == AF_FLOAT_CMP) { - switch (cc) { - case AF_CC_E: return ALU_OP2_SETE_DX10; - case AF_CC_NE: return ALU_OP2_SETNE_DX10; - case AF_CC_GT: return ALU_OP2_SETGT_DX10; - case AF_CC_GE: return ALU_OP2_SETGE_DX10; - } - } else { - - switch(cmp_type) { - case AF_FLOAT_CMP: { - switch (cc) { - case AF_CC_E: return ALU_OP2_SETE; - case AF_CC_NE: return ALU_OP2_SETNE; - case AF_CC_GT: return ALU_OP2_SETGT; - case AF_CC_GE: return ALU_OP2_SETGE; - } - break; - } - case AF_INT_CMP: { - switch (cc) { - case AF_CC_E: return ALU_OP2_SETE_INT; - case AF_CC_NE: return ALU_OP2_SETNE_INT; - case AF_CC_GT: return ALU_OP2_SETGT_INT; - case AF_CC_GE: return ALU_OP2_SETGE_INT; - } - break; - } - case AF_UINT_CMP: { - switch (cc) { - case AF_CC_E: return ALU_OP2_SETE_INT; - case AF_CC_NE: return ALU_OP2_SETNE_INT; - case AF_CC_GT: return ALU_OP2_SETGT_UINT; - case AF_CC_GE: return ALU_OP2_SETGE_UINT; - } - break; - } - } - } - - assert(!"unexpected cc&cmp_type combination"); - return ~0u; -} - -unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) { - - switch(cmp_type) { - case AF_FLOAT_CMP: { - switch (cc) { - case AF_CC_E: return ALU_OP2_PRED_SETE; - case AF_CC_NE: return ALU_OP2_PRED_SETNE; - case AF_CC_GT: return ALU_OP2_PRED_SETGT; - case AF_CC_GE: return ALU_OP2_PRED_SETGE; - } - break; - } - case AF_INT_CMP: { - switch (cc) { - case AF_CC_E: return ALU_OP2_PRED_SETE_INT; - case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT; - case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT; - case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT; - } - break; - } - case AF_UINT_CMP: { - switch (cc) { - case AF_CC_E: return ALU_OP2_PRED_SETE_INT; - case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT; - case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT; - case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT; - } - break; - } - } - - assert(!"unexpected cc&cmp_type combination"); - return ~0u; -} - -unsigned get_killcc_op(unsigned cc, unsigned cmp_type) { - - switch(cmp_type) { - case AF_FLOAT_CMP: { - switch (cc) { - case AF_CC_E: return ALU_OP2_KILLE; - case AF_CC_NE: return ALU_OP2_KILLNE; - case AF_CC_GT: return ALU_OP2_KILLGT; - case AF_CC_GE: return ALU_OP2_KILLGE; - } - break; - } - case AF_INT_CMP: { - switch (cc) { - case AF_CC_E: return ALU_OP2_KILLE_INT; - case AF_CC_NE: return ALU_OP2_KILLNE_INT; - case AF_CC_GT: return ALU_OP2_KILLGT_INT; - case AF_CC_GE: return ALU_OP2_KILLGE_INT; - } - break; - } - case AF_UINT_CMP: { - switch (cc) { - case AF_CC_E: return ALU_OP2_KILLE_INT; - case AF_CC_NE: return ALU_OP2_KILLNE_INT; - case AF_CC_GT: return ALU_OP2_KILLGT_UINT; - case AF_CC_GE: return ALU_OP2_KILLGE_UINT; - } - break; - } - } - - assert(!"unexpected cc&cmp_type combination"); - return ~0u; -} - -unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) { - - switch(cmp_type) { - case AF_FLOAT_CMP: { - switch (cc) { - case AF_CC_E: return ALU_OP3_CNDE; - case AF_CC_GT: return ALU_OP3_CNDGT; - case AF_CC_GE: return ALU_OP3_CNDGE; - } - break; - } - case AF_INT_CMP: { - switch (cc) { - case AF_CC_E: return ALU_OP3_CNDE_INT; - case AF_CC_GT: return ALU_OP3_CNDGT_INT; - case AF_CC_GE: return ALU_OP3_CNDGE_INT; - } - break; - } - } - - assert(!"unexpected cc&cmp_type combination"); - return ~0u; -} - - -void convert_predset_to_set(shader& sh, alu_node* a) { - - unsigned flags = a->bc.op_ptr->flags; - unsigned cc = flags & AF_CC_MASK; - unsigned cmp_type = flags & AF_CMP_TYPE_MASK; - - bool swap_args = false; - - cc = invert_setcc_condition(cc, swap_args); - - unsigned newop = get_setcc_op(cc, cmp_type, true); - - a->dst.resize(1); - a->bc.set_op(newop); - - if (swap_args) { - std::swap(a->src[0], a->src[1]); - std::swap(a->bc.src[0], a->bc.src[1]); - } - - a->bc.update_exec_mask = 0; - a->bc.update_pred = 0; -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_expr.h b/src/gallium/drivers/r600/sb/sb_expr.h deleted file mode 100644 index f3c7068..0000000 --- a/src/gallium/drivers/r600/sb/sb_expr.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#ifndef SB_EXPR_H_ -#define SB_EXPR_H_ - -namespace r600_sb { - -inline float float_clamp(float v) { - return v < 0.0f ? 0.0f : (v > 1.0f ? 1.0f : v); -} - -value* get_select_value_for_em(shader &sh, value *em); - -void convert_predset_to_set(shader &sh, alu_node *a); -unsigned invert_setcc_condition(unsigned cc, bool &swap_args); -unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst); -unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type); -unsigned get_killcc_op(unsigned cc, unsigned cmp_type); -unsigned get_cndcc_op(unsigned cc, unsigned cmp_type); - -void convert_to_mov(alu_node &n, value *src, - bool neg = false, bool abs = false); - -class expr_handler { - - shader &sh; - value_table &vt; - -public: - - expr_handler(shader &sh); - - bool equal(value *l, value *r); - bool defs_equal(value *l, value *r); - bool args_equal(const vvec &l, const vvec &r); - bool ops_equal(const alu_node *l, const alu_node *r); - bool ivars_equal(value *l, value *r); - - value* get_const(const literal &l); - - bool try_fold(value *v); - bool try_fold(node *n); - - bool fold(node &n); - bool fold(container_node &n); - bool fold(alu_node &n); - bool fold(fetch_node &n); - bool fold(cf_node &n); - - bool fold_setcc(alu_node &n); - - bool fold_alu_op1(alu_node &n); - bool fold_alu_op2(alu_node &n); - bool fold_alu_op3(alu_node &n); - - bool fold_mul_add(alu_node *n); - bool eval_const_op(unsigned op, literal &r, literal cv0, literal cv1); - bool fold_assoc(alu_node *n); - - static void apply_alu_src_mod(const bc_alu &bc, unsigned src, literal &v); - static void apply_alu_dst_mod(const bc_alu &bc, literal &v); - - void assign_source(value *dst, value *src); - - static bool evaluate_condition(unsigned alu_cnd_flags, literal s1, - literal s2); -}; - -} // namespace r600_sb - -#endif /* SB_EXPR_H_ */ diff --git a/src/gallium/drivers/r600/sb/sb_gcm.cpp b/src/gallium/drivers/r600/sb/sb_gcm.cpp deleted file mode 100644 index 7776a10..0000000 --- a/src/gallium/drivers/r600/sb/sb_gcm.cpp +++ /dev/null @@ -1,811 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#define GCM_DEBUG 0 - -#if GCM_DEBUG -#define GCM_DUMP(a) do { a } while(0); -#else -#define GCM_DUMP(a) -#endif - -#include - -#include "sb_bc.h" -#include "sb_shader.h" -#include "sb_pass.h" -#include "eg_sq.h" // V_SQ_CF_INDEX_NONE - -namespace r600_sb { - -int gcm::run() { - - GCM_DUMP( sblog << "==== GCM ==== \n"; sh.dump_ir(); ); - - collect_instructions(sh.root, true); - - init_def_count(uses, pending); - - for (node_iterator N, I = pending.begin(), E = pending.end(); - I != E; I = N) { - N = I; - ++N; - node *o = *I; - - GCM_DUMP( - sblog << "pending : "; - dump::dump_op(o); - sblog << "\n"; - ); - - if (td_is_ready(o)) { - - GCM_DUMP( - sblog << " ready: "; - dump::dump_op(o); - sblog << "\n"; - ); - pending.remove_node(o); - ready.push_back(o); - } else { - } - } - - sched_early(sh.root); - - if (!pending.empty()) { - sblog << "##### gcm_sched_early_pass: unscheduled ops:\n"; - dump::dump_op(pending.front()); - } - - assert(pending.empty()); - - GCM_DUMP( sh.dump_ir(); ); - - GCM_DUMP( sblog << "\n\n ############## gcm late\n\n"; ); - - collect_instructions(sh.root, false); - - init_use_count(uses, pending); - - sched_late(sh.root); - if (!pending.empty()) { - sblog << "##### gcm_sched_late_pass: unscheduled ops:\n"; - dump::dump_op(pending.front()); - } - - assert(ucs_level == 0); - assert(pending.empty()); - - return 0; -} - - -void gcm::collect_instructions(container_node *c, bool early_pass) { - if (c->is_bb()) { - - if (early_pass) { - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - node *n = *I; - if (n->flags & NF_DONT_MOVE) { - op_info &o = op_map[n]; - o.top_bb = o.bottom_bb = static_cast(c); - } - } - } - - pending.append_from(c); - return; - } - - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - if (I->is_container()) { - collect_instructions(static_cast(*I), early_pass); - } - } -} - -void gcm::sched_early(container_node *n) { - - region_node *r = - (n->type == NT_REGION) ? static_cast(n) : NULL; - - if (r && r->loop_phi) { - sched_early(r->loop_phi); - } - - for (node_iterator I = n->begin(), E = n->end(); I != E; ++I) { - if (I->type == NT_OP) { - node *op = *I; - if (op->subtype == NST_PHI) { - td_release_uses(op->dst); - } - } else if (I->is_container()) { - if (I->subtype == NST_BB) { - bb_node* bb = static_cast(*I); - td_sched_bb(bb); - } else { - sched_early(static_cast(*I)); - } - } - } - - if (r && r->phi) { - sched_early(r->phi); - } -} - -void gcm::td_schedule(bb_node *bb, node *n) { - GCM_DUMP( - sblog << "scheduling : "; - dump::dump_op(n); - sblog << "\n"; - ); - td_release_uses(n->dst); - - bb->push_back(n); - - op_map[n].top_bb = bb; - -} - -void gcm::td_sched_bb(bb_node* bb) { - GCM_DUMP( - sblog << "td scheduling BB_" << bb->id << "\n"; - ); - - while (!ready.empty()) { - for (sq_iterator N, I = ready.begin(), E = ready.end(); I != E; - I = N) { - N = I; ++N; - td_schedule(bb, *I); - ready.erase(I); - } - } -} - -bool gcm::td_is_ready(node* n) { - return uses[n] == 0; -} - -void gcm::td_release_val(value *v) { - - GCM_DUMP( - sblog << "td checking uses: "; - dump::dump_val(v); - sblog << "\n"; - ); - - for (uselist::iterator I = v->uses.begin(), E = v->uses.end(); I != E; ++I) { - node *op = *I; - if (op->parent != &pending) { - continue; - } - - GCM_DUMP( - sblog << "td used in "; - dump::dump_op(op); - sblog << "\n"; - ); - - assert(uses[op] > 0); - if (--uses[op] == 0) { - GCM_DUMP( - sblog << "td released : "; - dump::dump_op(op); - sblog << "\n"; - ); - - pending.remove_node(op); - ready.push_back(op); - } - } - -} - -void gcm::td_release_uses(vvec& v) { - for (vvec::iterator I = v.begin(), E = v.end(); I != E; ++I) { - value *v = *I; - if (!v) - continue; - - if (v->is_rel()) - td_release_uses(v->mdef); - else - td_release_val(v); - } -} - -void gcm::sched_late(container_node *n) { - - bool stack_pushed = false; - - if (n->is_depart()) { - depart_node *d = static_cast(n); - push_uc_stack(); - stack_pushed = true; - bu_release_phi_defs(d->target->phi, d->dep_id); - } else if (n->is_repeat()) { - repeat_node *r = static_cast(n); - assert(r->target->loop_phi); - push_uc_stack(); - stack_pushed = true; - bu_release_phi_defs(r->target->loop_phi, r->rep_id); - } - - for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) { - if (I->is_container()) { - if (I->subtype == NST_BB) { - bb_node* bb = static_cast(*I); - bu_sched_bb(bb); - } else { - sched_late(static_cast(*I)); - } - } - } - - if (n->type == NT_IF) { - if_node *f = static_cast(n); - if (f->cond) - pending_defs.push_back(f->cond); - } else if (n->type == NT_REGION) { - region_node *r = static_cast(n); - if (r->loop_phi) - bu_release_phi_defs(r->loop_phi, 0); - } - - if (stack_pushed) - pop_uc_stack(); - -} - -void gcm::bu_sched_bb(bb_node* bb) { - GCM_DUMP( - sblog << "bu scheduling BB_" << bb->id << "\n"; - ); - - bu_bb = bb; - - if (!pending_nodes.empty()) { - GCM_DUMP( - sblog << "pending nodes:\n"; - ); - - // TODO consider sorting the exports by array_base, - // possibly it can improve performance - - for (node_list::iterator I = pending_nodes.begin(), - E = pending_nodes.end(); I != E; ++I) { - bu_release_op(*I); - } - pending_nodes.clear(); - GCM_DUMP( - sblog << "pending nodes processed...\n"; - ); - } - - - if (!pending_defs.empty()) { - for (vvec::iterator I = pending_defs.begin(), E = pending_defs.end(); - I != E; ++I) { - bu_release_val(*I); - } - pending_defs.clear(); - } - - for (sched_queue::iterator N, I = ready_above.begin(), E = ready_above.end(); - I != E; I = N) { - N = I; - ++N; - node *n = *I; - if (op_map[n].bottom_bb == bb) { - add_ready(*I); - ready_above.erase(I); - } - } - - unsigned cnt_ready[SQ_NUM]; - - container_node *clause = NULL; - unsigned last_inst_type = ~0; - unsigned last_count = 0; - - bool s = true; - while (s) { - node *n; - - s = false; - - unsigned ready_mask = 0; - - for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) { - if (!bu_ready[sq].empty() || !bu_ready_next[sq].empty()) - ready_mask |= (1 << sq); - } - - if (!ready_mask) { - for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) { - if (!bu_ready_early[sq].empty()) { - node *n = bu_ready_early[sq].front(); - bu_ready_early[sq].pop_front(); - bu_ready[sq].push_back(n); - break; - } - } - } - - for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) { - - if (sq == SQ_CF && pending_exec_mask_update) { - pending_exec_mask_update = false; - sq = SQ_ALU; - --sq; - continue; - } - - if (sq != SQ_ALU && outstanding_lds_oq) - continue; - - if (!bu_ready_next[sq].empty()) - bu_ready[sq].splice(bu_ready[sq].end(), bu_ready_next[sq]); - - cnt_ready[sq] = bu_ready[sq].size(); - - if ((sq == SQ_TEX || sq == SQ_VTX) && live_count <= rp_threshold && - cnt_ready[sq] < ctx.max_fetch/2 && - !bu_ready_next[SQ_ALU].empty()) { - sq = SQ_ALU; - --sq; - continue; - } - - while (!bu_ready[sq].empty()) { - - if (last_inst_type != sq) { - clause = NULL; - last_count = 0; - last_inst_type = sq; - } - - // simple heuristic to limit register pressure, - if (sq == SQ_ALU && live_count > rp_threshold && !outstanding_lds_oq && - (!bu_ready[SQ_TEX].empty() || - !bu_ready[SQ_VTX].empty() || - !bu_ready_next[SQ_TEX].empty() || - !bu_ready_next[SQ_VTX].empty())) { - GCM_DUMP( sblog << "switching to fetch (regpressure)\n"; ); - break; - } - - n = bu_ready[sq].front(); - - // real count (e.g. SAMPLE_G will be expanded to 3 instructions, - // 2 SET_GRAD_ + 1 SAMPLE_G - unsigned ncnt = 1; - if (n->is_fetch_inst() && n->src.size() == 12) { - ncnt = 3; - } - - bool sampler_indexing = false; - if (n->is_fetch_inst() && - static_cast(n)->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) - { - sampler_indexing = true; // Give sampler indexed ops get their own clause - ncnt = sh.get_ctx().is_cayman() ? 2 : 3; // MOVA + SET_CF_IDX0/1 - } - - if ((sq == SQ_TEX || sq == SQ_VTX) && - ((last_count >= ctx.max_fetch/2 && - check_alu_ready_count(24)) || - last_count + ncnt > ctx.max_fetch)) - break; - else if (sq == SQ_CF && last_count > 4 && - check_alu_ready_count(24)) - break; - - - if (sq == SQ_ALU && n->consumes_lds_oq() && - (bu_ready[SQ_TEX].size() || bu_ready[SQ_VTX].size() || bu_ready[SQ_GDS].size())) { - GCM_DUMP( sblog << "switching scheduling due to lds op\n"; ); - break; - } - bu_ready[sq].pop_front(); - - if (sq != SQ_CF) { - if (!clause || sampler_indexing) { - node_subtype nst; - switch (sq) { - case SQ_ALU: - nst = NST_ALU_CLAUSE; - break; - case SQ_TEX: - nst = NST_TEX_CLAUSE; - break; - case SQ_GDS: - nst = NST_GDS_CLAUSE; - break; - default: - nst = NST_VTX_CLAUSE; - break; - } - clause = sh.create_clause(nst); - bb->push_front(clause); - } - } else { - clause = bb; - } - - bu_schedule(clause, n); - s = true; - last_count += ncnt; - } - } - } - - bu_bb = NULL; - - GCM_DUMP( - sblog << "bu finished scheduling BB_" << bb->id << "\n"; - ); -} - -void gcm::bu_release_defs(vvec& v, bool src) { - for (vvec::reverse_iterator I = v.rbegin(), E = v.rend(); I != E; ++I) { - value *v = *I; - if (!v || v->is_readonly()) - continue; - - if (v->is_rel()) { - if (!v->rel->is_readonly()) - bu_release_val(v->rel); - bu_release_defs(v->muse, true); - } else if (src) - bu_release_val(v); - else { - if (live.remove_val(v)) { - --live_count; - } - } - } -} - -void gcm::push_uc_stack() { - GCM_DUMP( - sblog << "pushing use count stack prev_level " << ucs_level - << " new level " << (ucs_level + 1) << "\n"; - ); - ++ucs_level; - if (ucs_level == nuc_stk.size()) { - nuc_stk.resize(ucs_level + 1); - } - else { - nuc_stk[ucs_level].clear(); - } -} - -bool gcm::bu_is_ready(node* n) { - nuc_map &cm = nuc_stk[ucs_level]; - nuc_map::iterator F = cm.find(n); - unsigned uc = (F == cm.end() ? 0 : F->second); - return uc == uses[n]; -} - -void gcm::bu_schedule(container_node* c, node* n) { - GCM_DUMP( - sblog << "bu scheduling : "; - dump::dump_op(n); - sblog << "\n"; - ); - - assert(op_map[n].bottom_bb == bu_bb); - - if (n->produces_lds_oq()) - outstanding_lds_oq--; - if (n->consumes_lds_oq()) - outstanding_lds_oq++; - bu_release_defs(n->src, true); - bu_release_defs(n->dst, false); - - c->push_front(n); -} - -void gcm::dump_uc_stack() { - sblog << "##### uc_stk start ####\n"; - for (unsigned l = 0; l <= ucs_level; ++l) { - nuc_map &m = nuc_stk[l]; - - sblog << "nuc_stk[" << l << "] : @" << &m << "\n"; - - for (nuc_map::iterator I = m.begin(), E = m.end(); I != E; ++I) { - sblog << " uc " << I->second << " for "; - dump::dump_op(I->first); - sblog << "\n"; - } - } - sblog << "##### uc_stk end ####\n"; -} - -void gcm::pop_uc_stack() { - nuc_map &pm = nuc_stk[ucs_level]; - --ucs_level; - nuc_map &cm = nuc_stk[ucs_level]; - - GCM_DUMP( - sblog << "merging use stack from level " << (ucs_level+1) - << " to " << ucs_level << "\n"; - ); - - for (nuc_map::iterator N, I = pm.begin(), E = pm.end(); I != E; ++I) { - node *n = I->first; - - GCM_DUMP( - sblog << " " << cm[n] << " += " << I->second << " for "; - dump::dump_op(n); - sblog << "\n"; - ); - - unsigned uc = cm[n] += I->second; - - if (n->parent == &pending && uc == uses[n]) { - cm.erase(n); - pending_nodes.push_back(n); - GCM_DUMP( - sblog << "pushed pending_node due to stack pop "; - dump::dump_op(n); - sblog << "\n"; - ); - } - } -} - -void gcm::bu_find_best_bb(node *n, op_info &oi) { - - GCM_DUMP( - sblog << " find best bb : "; - dump::dump_op(n); - sblog << "\n"; - ); - - if (oi.bottom_bb) - return; - - // don't hoist generated copies - if (n->flags & NF_DONT_HOIST) { - oi.bottom_bb = bu_bb; - return; - } - - bb_node* best_bb = bu_bb; - bb_node* top_bb = oi.top_bb; - assert(oi.top_bb && !oi.bottom_bb); - - node *c = best_bb; - - // FIXME top_bb may be located inside the loop so we'll never enter it - // in the loop below, and the instruction will be incorrectly placed at the - // beginning of the shader. - // For now just check if top_bb's loop_level is higher than of - // current bb and abort the search for better bb in such case, - // but this problem may require more complete (and more expensive) fix - if (top_bb->loop_level <= best_bb->loop_level) { - while (c && c != top_bb) { - - if (c->prev) { - c = c->prev; - } else { - c = c->parent; - if (!c) - break; - continue; - } - - if (c->subtype == NST_BB) { - bb_node *bb = static_cast(c); - if (bb->loop_level < best_bb->loop_level) - best_bb = bb; - } - } - } - - oi.bottom_bb = best_bb; -} - -void gcm::add_ready(node *n) { - sched_queue_id sq = sh.get_queue_id(n); - if (n->flags & NF_SCHEDULE_EARLY) - bu_ready_early[sq].push_back(n); - else if (sq == SQ_ALU && n->is_copy_mov()) - bu_ready[sq].push_front(n); - else if (n->is_alu_inst()) { - alu_node *a = static_cast(n); - if (a->bc.op_ptr->flags & AF_PRED && a->dst[2]) { - // PRED_SET instruction that updates exec mask - pending_exec_mask_update = true; - } - bu_ready_next[sq].push_back(n); - } else - bu_ready_next[sq].push_back(n); -} - -void gcm::bu_release_op(node * n) { - op_info &oi = op_map[n]; - - GCM_DUMP( - sblog << " bu release op "; - dump::dump_op(n); - ); - - nuc_stk[ucs_level].erase(n); - pending.remove_node(n); - - bu_find_best_bb(n, oi); - - if (oi.bottom_bb == bu_bb) { - GCM_DUMP( sblog << " ready\n";); - add_ready(n); - } else { - GCM_DUMP( sblog << " ready_above\n";); - ready_above.push_back(n); - } -} - -void gcm::bu_release_phi_defs(container_node* p, unsigned op) -{ - for (node_riterator I = p->rbegin(), E = p->rend(); I != E; ++I) { - node *o = *I; - value *v = o->src[op]; - if (v && !v->is_readonly()) - pending_defs.push_back(o->src[op]); - - } -} - -unsigned gcm::get_uc_vec(vvec &vv) { - unsigned c = 0; - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (!v) - continue; - - if (v->is_rel()) - c += get_uc_vec(v->mdef); - else - c += v->use_count(); - } - return c; -} - -void gcm::init_use_count(nuc_map& m, container_node &s) { - m.clear(); - for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) { - node *n = *I; - unsigned uc = get_uc_vec(n->dst); - GCM_DUMP( - sblog << "uc " << uc << " "; - dump::dump_op(n); - sblog << "\n"; - ); - if (!uc) { - pending_nodes.push_back(n); - GCM_DUMP( - sblog << "pushed pending_node in init "; - dump::dump_op(n); - sblog << "\n"; - ); - - } else - m[n] = uc; - } -} - -void gcm::bu_release_val(value* v) { - node *n = v->any_def(); - - if (n && n->parent == &pending) { - nuc_map &m = nuc_stk[ucs_level]; - unsigned uc = ++m[n]; - unsigned uc2 = uses[n]; - - if (live.add_val(v)) { - ++live_count; - GCM_DUMP ( sblog << "live_count: " << live_count << "\n"; ); - } - - GCM_DUMP( - sblog << "release val "; - dump::dump_val(v); - sblog << " for node "; - dump::dump_op(n); - sblog << " new uc=" << uc << ", total " << uc2 << "\n"; - ); - - if (uc == uc2) - bu_release_op(n); - } - -} - -void gcm::init_def_count(nuc_map& m, container_node& s) { - m.clear(); - for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) { - node *n = *I; - unsigned dc = get_dc_vec(n->src, true) + get_dc_vec(n->dst, false); - m[n] = dc; - - GCM_DUMP( - sblog << "dc " << dc << " "; - dump::dump_op(n); - sblog << "\n"; - ); - } -} - -unsigned gcm::get_dc_vec(vvec& vv, bool src) { - unsigned c = 0; - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (!v || v->is_readonly()) - continue; - - if (v->is_rel()) { - c += v->rel->def != NULL; - c += get_dc_vec(v->muse, true); - } - else if (src) { - c += v->def != NULL; - c += v->adef != NULL; - } - } - return c; -} - -unsigned gcm::real_alu_count(sched_queue& q, unsigned max) { - sq_iterator I(q.begin()), E(q.end()); - unsigned c = 0; - - while (I != E && c < max) { - node *n = *I; - if (n->is_alu_inst()) { - if (!n->is_copy_mov() || !n->src[0]->is_any_gpr()) - ++c; - } else if (n->is_alu_packed()) { - c += static_cast(n)->count(); - } - ++I; - } - - return c; -} - -bool gcm::check_alu_ready_count(unsigned threshold) { - unsigned r = real_alu_count(bu_ready[SQ_ALU], threshold); - if (r >= threshold) - return true; - r += real_alu_count(bu_ready_next[SQ_ALU], threshold - r); - return r >= threshold; -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_gvn.cpp b/src/gallium/drivers/r600/sb/sb_gvn.cpp deleted file mode 100644 index c994ff6..0000000 --- a/src/gallium/drivers/r600/sb/sb_gvn.cpp +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#define GVN_DEBUG 0 - -#if GVN_DEBUG -#define GVN_DUMP(q) do { q } while (0) -#else -#define GVN_DUMP(q) -#endif - -#include "sb_shader.h" -#include "sb_pass.h" -#include "sb_sched.h" - -namespace r600_sb { - -bool gvn::visit(node& n, bool enter) { - if (enter) { - - - bool rewrite = true; - - if (n.dst[0]->is_agpr()) { - rewrite = false; - } - - - process_op(n, rewrite); - - assert(n.parent); - - if (n.parent->subtype == NST_LOOP_PHI_CONTAINER) { - // There is a problem - sometimes with nested loops - // loop counter initialization for inner loop is incorrectly hoisted - // out of the outer loop - - // FIXME not sure if this is enough to fix a problem completely, - // possibly more complete fix is needed (anyway, the - // problem was seen only in relatively complex - // case involving nested loops and - // indirect access to loop counters (without proper array info - // loop counters may be considered as array elements too), - // was not seen in any tests - // or real apps when proper array information is available in TGSI). - - // For now just mark the instructions that initialize loop counters - // with DONT_HOIST flag to prevent the insts like MOV r, 0 - // (initialization of inner loop's counter with const) - // from being hoisted out of the outer loop - - assert(!n.src.empty()); - value *v = n.src[0]; - - if (v->is_any_gpr() && v->def) - v->def->flags |= NF_DONT_HOIST; - } - - } else { - } - return true; -} - -bool gvn::visit(cf_node& n, bool enter) { - if (enter) { - process_op(n); - } else { - } - return true; -} - -bool gvn::visit(alu_node& n, bool enter) { - if (enter) { - process_op(n); - } else { - } - return true; -} - -bool gvn::visit(alu_packed_node& n, bool enter) { - if (enter) { - process_op(n); - } else { - } - return false; -} - -bool gvn::visit(fetch_node& n, bool enter) { - if (enter) { - process_op(n); - } else { - } - return true; -} - -bool gvn::visit(region_node& n, bool enter) { - if (enter) { -// FIXME: loop_phi sources are undefined yet (except theone from the preceding -// code), can we handle that somehow? -// if (n.loop_phi) -// run_on(*n.loop_phi); - } else { - if (n.loop_phi) - run_on(*n.loop_phi); - - if (n.phi) - run_on(*n.phi); - } - return true; -} - -bool gvn::process_src(value* &v, bool rewrite) { - if (!v->gvn_source) - sh.vt.add_value(v); - - if (rewrite && !v->gvn_source->is_rel()) { - v = v->gvn_source; - return true; - } - return false; -} - -// FIXME: maybe handle it in the scheduler? -void gvn::process_alu_src_constants(node &n, value* &v) { - if (n.src.size() < 3) { - process_src(v, true); - return; - } - - if (!v->gvn_source) - sh.vt.add_value(v); - - rp_kcache_tracker kc(sh); - - if (v->gvn_source->is_kcache()) - kc.try_reserve(v->gvn_source->select); - - // don't propagate 3rd constant to the trans-only instruction - if (!n.is_alu_packed()) { - alu_node *a = static_cast(&n); - if (a->bc.op_ptr->src_count == 3 && !(a->bc.slot_flags & AF_V)) { - unsigned const_count = 0; - for (vvec::iterator I = n.src.begin(), E = n.src.end(); I != E; - ++I) { - value *c = (*I); - if (c && c->is_readonly() && ++const_count == 2) { - process_src(v, false); - return; - } - } - } - } - - unsigned kcache_count = 0; - for (vvec::iterator I = n.src.begin(), E = n.src.end(); I != E; ++I) { - value *c = (*I); - - if (c->is_kcache() && (!kc.try_reserve(c->select) || ++kcache_count == 2)) { - process_src(v, false); - return; - } - } - process_src(v, true); -} - -void gvn::process_op(node& n, bool rewrite) { - - for(vvec::iterator I = n.src.begin(), E = n.src.end(); I != E; ++I) { - value* &v = *I; - if (v) { - if (v->rel) { - process_src(v->rel, rewrite); - } - - if (rewrite && v->gvn_source && v->gvn_source->is_readonly() && - n.is_any_alu()) { - process_alu_src_constants(n, v); - } else if (rewrite && v->gvn_source && v->gvn_source->is_const() && - (n.is_fetch_op(FETCH_OP_VFETCH) || - n.is_fetch_op(FETCH_OP_SEMFETCH))) - process_src(v, false); - else - process_src(v, rewrite); - } - } - if (n.pred) - process_src(n.pred, false); - - if (n.type == NT_IF) { - if_node &i = (if_node&)n; - if (i.cond) - process_src(i.cond, false); - } - - for(vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) { - value *v = *I; - if (v) { - if (v->rel) - process_src(v->rel, rewrite); - sh.vt.add_value(v); - } - } -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_if_conversion.cpp b/src/gallium/drivers/r600/sb/sb_if_conversion.cpp deleted file mode 100644 index 7452f1c..0000000 --- a/src/gallium/drivers/r600/sb/sb_if_conversion.cpp +++ /dev/null @@ -1,308 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#define IFC_DEBUG 0 - -#if IFC_DEBUG -#define IFC_DUMP(q) do { q } while (0) -#else -#define IFC_DUMP(q) -#endif - -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -int if_conversion::run() { - - regions_vec &rv = sh.get_regions(); - - UNUSED unsigned converted = 0; - for (regions_vec::reverse_iterator I = rv.rbegin(); I != rv.rend(); ) { - region_node *r = *I; - if (run_on(r)) { - I = regions_vec::reverse_iterator(rv.erase((++I).base())); - ++converted; - } else - ++I; - } - return 0; -} - -void if_conversion::convert_kill_instructions(region_node *r, - value *em, bool branch, - container_node *c) { - value *cnd = NULL; - - for (node_iterator I = c->begin(), E = c->end(), N; I != E; I = N) { - N = I + 1; - - if (!I->is_alu_inst()) - continue; - - alu_node *a = static_cast(*I); - unsigned flags = a->bc.op_ptr->flags; - - if (!(flags & AF_KILL)) - continue; - - // ignore predicated or non-const kill instructions - if (a->pred || !a->src[0]->is_const() || !a->src[1]->is_const()) - continue; - - literal l0 = a->src[0]->literal_value; - literal l1 = a->src[1]->literal_value; - - expr_handler::apply_alu_src_mod(a->bc, 0, l0); - expr_handler::apply_alu_src_mod(a->bc, 1, l1); - - if (expr_handler::evaluate_condition(flags, l0, l1)) { - // kill with constant 'true' condition, we'll convert it to the - // conditional kill outside of the if-then-else block - - a->remove(); - - if (!cnd) { - cnd = get_select_value_for_em(sh, em); - } else { - // more than one kill with the same condition, just remove it - continue; - } - - r->insert_before(a); - a->bc.set_op(branch ? ALU_OP2_KILLE_INT : ALU_OP2_KILLNE_INT); - - a->src[0] = cnd; - a->src[1] = sh.get_const_value(0); - // clear modifiers - a->bc.src[0].clear(); - a->bc.src[1].clear(); - } else { - // kill with constant 'false' condition, this shouldn't happen - // but remove it anyway - a->remove(); - } - } -} - -bool if_conversion::check_and_convert(region_node *r) { - - depart_node *nd1 = static_cast(r->first); - if (!nd1->is_depart() || nd1->target != r) - return false; - if_node *nif = static_cast(nd1->first); - if (!nif->is_if()) - return false; - depart_node *nd2 = static_cast(nif->first); - if (!nd2->is_depart() || nd2->target != r) - return false; - - value* &em = nif->cond; - - node_stats s; - - r->collect_stats(s); - - IFC_DUMP( - sblog << "ifcvt: region " << r->region_id << " :\n"; - s.dump(); - ); - - if (s.region_count || s.fetch_count || s.alu_kill_count || - s.if_count != 1 || s.repeat_count || s.uses_ar) - return false; - - unsigned real_alu_count = s.alu_count - s.alu_copy_mov_count; - - // if_conversion allows to eliminate JUMP-ALU_POP_AFTER or - // JUMP-ALU-ELSE-ALU_POP_AFTER, for now let's assume that 3 CF instructions - // are eliminated. According to the docs, cost of CF instruction is - // equal to ~40 ALU VLIW instructions (instruction groups), - // so we have eliminated cost equal to ~120 groups in total. - // Let's also assume that we have avg 3 ALU instructions per group, - // This means that potential eliminated cost is about 360 single alu inst. - // On the other hand, we are speculatively executing conditional code now, - // so we are increasing the cost in some cases. In the worst case, we'll - // have to execute real_alu_count additional alu instructions instead of - // jumping over them. Let's assume for now that average added cost is - // - // (0.9 * real_alu_count) - // - // So we should perform if_conversion if - // - // (0.9 * real_alu_count) < 360, or - // - // real_alu_count < 400 - // - // So if real_alu_count is more than 400, than we think that if_conversion - // doesn't make sense. - - // FIXME: We can use more precise heuristic, taking into account sizes of - // the branches and their probability instead of total size. - // Another way to improve this is to consider the number of the groups - // instead of the number of instructions (taking into account actual VLIW - // packing). - // (Currently we don't know anything about packing at this stage, but - // probably we can make some more precise estimations anyway) - - if (real_alu_count > 400) - return false; - - IFC_DUMP( sblog << "if_cvt: processing...\n"; ); - - value *select = get_select_value_for_em(sh, em); - - if (!select) - return false; - - for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; ++I) { - node *n = *I; - - alu_node *ns = convert_phi(select, n); - - if (ns) - r->insert_after(ns); - } - - nd2->expand(); - nif->expand(); - nd1->expand(); - r->expand(); - - return true; -} - -bool if_conversion::run_on(region_node* r) { - - if (r->dep_count() != 2 || r->rep_count() != 1) - return false; - - depart_node *nd1 = static_cast(r->first); - if (!nd1->is_depart()) - return false; - if_node *nif = static_cast(nd1->first); - if (!nif->is_if()) - return false; - depart_node *nd2 = static_cast(nif->first); - if (!nd2->is_depart()) - return false; - - value* &em = nif->cond; - - convert_kill_instructions(r, em, true, nd2); - convert_kill_instructions(r, em, false, nd1); - - if (check_and_convert(r)) - return true; - - if (nd2->empty() && nif->next) { - // empty true branch, non-empty false branch - // we'll invert it to get rid of 'else' - - assert(em && em->def); - - alu_node *predset = static_cast(em->def); - - // create clone of PREDSET instruction with inverted condition. - // PREDSET has 3 dst operands in our IR (value written to gpr, - // predicate value and exec mask value), we'll split it such that - // new PREDSET will define exec mask value only, and two others will - // be defined in the old PREDSET (if they are not used then DCE will - // simply remove old PREDSET). - - alu_node *newpredset = sh.clone(predset); - predset->insert_after(newpredset); - - predset->dst[2] = NULL; - - newpredset->dst[0] = NULL; - newpredset->dst[1] = NULL; - - em->def = newpredset; - - unsigned cc = newpredset->bc.op_ptr->flags & AF_CC_MASK; - unsigned cmptype = newpredset->bc.op_ptr->flags & AF_CMP_TYPE_MASK; - bool swapargs = false; - - cc = invert_setcc_condition(cc, swapargs); - - if (swapargs) { - std::swap(newpredset->src[0], newpredset->src[1]); - std::swap(newpredset->bc.src[0], newpredset->bc.src[1]); - } - - unsigned newopcode = get_predsetcc_op(cc, cmptype); - newpredset->bc.set_op(newopcode); - - // move the code from the 'false' branch ('else') to the 'true' branch - nd2->move(nif->next, NULL); - - // swap phi operands - for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; - ++I) { - node *p = *I; - assert(p->src.size() == 2); - std::swap(p->src[0], p->src[1]); - } - } - - return false; -} - -alu_node* if_conversion::convert_phi(value* select, node* phi) { - assert(phi->dst.size() == 1 || phi->src.size() == 2); - - value *d = phi->dst[0]; - value *v1 = phi->src[0]; - value *v2 = phi->src[1]; - - assert(d); - - if (!d->is_any_gpr()) - return NULL; - - if (v1->is_undef()) { - if (v2->is_undef()) { - return NULL; - } else { - return sh.create_mov(d, v2); - } - } else if (v2->is_undef()) - return sh.create_mov(d, v1); - - alu_node* n = sh.create_alu(); - - n->bc.set_op(ALU_OP3_CNDE_INT); - n->dst.push_back(d); - n->src.push_back(select); - n->src.push_back(v1); - n->src.push_back(v2); - - return n; -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_ir.cpp b/src/gallium/drivers/r600/sb/sb_ir.cpp deleted file mode 100644 index fe068ab..0000000 --- a/src/gallium/drivers/r600/sb/sb_ir.cpp +++ /dev/null @@ -1,551 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include "sb_bc.h" -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -bool node::accept(vpass& p, bool enter) { return p.visit(*this, enter); } -bool container_node::accept(vpass& p, bool enter) { return p.visit(*this, enter); } -bool alu_group_node::accept(vpass& p, bool enter) { return p.visit(*this, enter); } -bool alu_node::accept(vpass& p, bool enter) { return p.visit(*this, enter); } -bool cf_node::accept(vpass& p, bool enter) { return p.visit(*this, enter); } -bool fetch_node::accept(vpass& p, bool enter) { return p.visit(*this, enter); } -bool region_node::accept(vpass& p, bool enter) { return p.visit(*this, enter); } - -bool repeat_node::accept(vpass& p, bool enter) { - return p.visit(*this, enter); -} - -bool depart_node::accept(vpass& p, bool enter) { - return p.visit(*this, enter); -} -bool if_node::accept(vpass& p, bool enter) { return p.visit(*this, enter); } -bool bb_node::accept(vpass& p, bool enter) { return p.visit(*this, enter); } -bool alu_packed_node::accept(vpass& p, bool enter) { - return p.visit(*this, enter); -} - -void alu_packed_node::init_args(bool repl) { - alu_node *p = static_cast(first); - assert(p->is_valid()); - while (p) { - dst.insert(dst.end(), p->dst.begin(), p->dst.end()); - src.insert(src.end(), p->src.begin(), p->src.end()); - p = static_cast(p->next); - } - - value *replicated_value = NULL; - - for (vvec::iterator I = dst.begin(), E = dst.end(); I != E; ++I) { - value *v = *I; - if (v) { - if (repl) { - if (replicated_value) - v->assign_source(replicated_value); - else - replicated_value = v; - } - - v->def = this; - } - } -} - -void container_node::insert_node_before(node* s, node* n) { - if (s->prev) { - node *sp = s->prev; - sp->next = n; - n->prev = sp; - n->next = s; - s->prev = n; - } else { - n->next = s; - s->prev = n; - first = n; - } - n->parent = this; -} - -void container_node::insert_node_after(node* s, node* n) { - if (s->next) { - node *sn = s->next; - sn->prev = n; - n->next = sn; - n->prev = s; - s->next = n; - } else { - n->prev = s; - s->next = n; - last = n; - } - n->parent = this; -} - -void container_node::move(iterator b, iterator e) { - assert(b != e); - - container_node *source_container = b->parent; - node *l = source_container->cut(b, e); - - first = last = l; - first->parent = this; - - while (last->next) { - last = last->next; - last->parent = this; - } -} - -node* container_node::cut(iterator b, iterator e) { - assert(!*b || b->parent == this); - assert(!*e || e->parent == this); - assert(b != e); - - if (b->prev) { - b->prev->next = *e; - } else { - first = *e; - } - - if (*e) { - e->prev->next = NULL; - e->prev = b->prev; - } else { - last->next = NULL; - last = b->prev; - } - - b->prev = NULL; - - return *b; -} - -unsigned container_node::count() { - unsigned c = 0; - node *t = first; - while (t) { - t = t->next; - c++; - } - return c; -} - -void container_node::remove_node(node *n) { - if (n->prev) - n->prev->next = n->next; - else - first = n->next; - if (n->next) - n->next->prev = n->prev; - else - last = n->prev; - n->parent = NULL; -} - -void container_node::expand(container_node *n) { - if (!n->empty()) { - node *e0 = n->first; - node *e1 = n->last; - - e0->prev = n->prev; - if (e0->prev) { - e0->prev->next = e0; - } else { - first = e0; - } - - e1->next = n->next; - if (e1->next) - e1->next->prev = e1; - else - last = e1; - - do { - e0->parent = this; - e0 = e0->next; - } while (e0 != e1->next); - } else - remove_node(n); -} - -void container_node::push_back(node *n) { - if (last) { - last->next = n; - n->next = NULL; - n->prev = last; - last = n; - } else { - assert(!first); - first = last = n; - n->prev = n->next = NULL; - } - n->parent = this; -} -void container_node::push_front(node *n) { - if (first) { - first->prev = n; - n->prev = NULL; - n->next = first; - first = n; - } else { - assert(!last); - first = last = n; - n->prev = n->next = NULL; - } - n->parent = this; -} - -void node::insert_before(node* n) { - parent->insert_node_before(this, n); -} - -void node::insert_after(node* n) { - parent->insert_node_after(this, n); -} - -void node::replace_with(node* n) { - n->prev = prev; - n->next = next; - n->parent = parent; - if (prev) - prev->next = n; - if (next) - next->prev = n; - - if (parent->first == this) - parent->first = n; - - if (parent->last == this) - parent->last = n; - - parent = NULL; - next = prev = NULL; -} - -void container_node::expand() { - parent->expand(this); -} - -void node::remove() {parent->remove_node(this); -} - -value_hash node::hash_src() const { - - value_hash h = 12345; - - for (int k = 0, e = src.size(); k < e; ++k) { - value *s = src[k]; - if (s) - h ^= (s->hash()); - } - - return h; -} - - -value_hash node::hash() const { - - if (parent && parent->subtype == NST_LOOP_PHI_CONTAINER) - return 47451; - - return hash_src() ^ (subtype << 13) ^ (type << 3); -} - -void r600_sb::container_node::append_from(container_node* c) { - if (!c->first) - return; - - node *b = c->first; - - if (last) { - last->next = c->first; - last->next->prev = last; - } else { - first = c->first; - } - - last = c->last; - c->first = NULL; - c->last = NULL; - - while (b) { - b->parent = this; - b = b->next; - } -} - -bool node::fold_dispatch(expr_handler* ex) { return ex->fold(*this); } -bool container_node::fold_dispatch(expr_handler* ex) { return ex->fold(*this); } -bool alu_node::fold_dispatch(expr_handler* ex) { return ex->fold(*this); } -bool alu_packed_node::fold_dispatch(expr_handler* ex) { return ex->fold(*this); } -bool fetch_node::fold_dispatch(expr_handler* ex) { return ex->fold(*this); } -bool cf_node::fold_dispatch(expr_handler* ex) { return ex->fold(*this); } - -unsigned alu_packed_node::get_slot_mask() { - unsigned mask = 0; - for (node_iterator I = begin(), E = end(); I != E; ++I) - mask |= 1 << static_cast(*I)->bc.slot; - return mask; -} - -void alu_packed_node::update_packed_items(sb_context &ctx) { - - vvec::iterator SI(src.begin()), DI(dst.begin()); - - assert(first); - - alu_node *c = static_cast(first); - unsigned flags = c->bc.op_ptr->flags; - unsigned slot_flags = c->bc.slot_flags; - - // fixup dst for instructions that replicate output - if (((flags & AF_REPL) && slot_flags == AF_4V) || - (ctx.is_cayman() && slot_flags == AF_S)) { - - value *swp[4] = {}; - - unsigned chan; - - for (vvec::iterator I2 = dst.begin(), E2 = dst.end(); - I2 != E2; ++I2) { - value *v = *I2; - if (v) { - chan = v->get_final_chan(); - assert(!swp[chan] || swp[chan] == v); - swp[chan] = v; - } - } - - chan = 0; - for (vvec::iterator I2 = dst.begin(), E2 = dst.end(); - I2 != E2; ++I2, ++chan) { - *I2 = swp[chan]; - } - } - - for (node_iterator I = begin(), E = end(); I != E; ++I) { - alu_node *n = static_cast(*I); - assert(n); - - for (vvec::iterator I2 = n->src.begin(), E2 = n->src.end(); - I2 != E2; ++I2, ++SI) { - *I2 = *SI; - } - for (vvec::iterator I2 = n->dst.begin(), E2 = n->dst.end(); - I2 != E2; ++I2, ++DI) { - *I2 = *DI; - } - } -} - -bool node::is_cf_op(unsigned op) { - if (!is_cf_inst()) - return false; - cf_node *c = static_cast(this); - return c->bc.op == op; -} - -bool node::is_alu_op(unsigned op) { - if (!is_alu_inst()) - return false; - alu_node *c = static_cast(this); - return c->bc.op == op; -} - -bool node::is_fetch_op(unsigned op) { - if (!is_fetch_inst()) - return false; - fetch_node *c = static_cast(this); - return c->bc.op == op; -} - - - -bool node::is_mova() { - if (!is_alu_inst()) - return false; - alu_node *a = static_cast(this); - return (a->bc.op_ptr->flags & AF_MOVA); -} - -bool node::is_pred_set() { - if (!is_alu_inst()) - return false; - alu_node *a = static_cast(this); - return (a->bc.op_ptr->flags & AF_ANY_PRED); -} - -unsigned node::cf_op_flags() { - assert(is_cf_inst()); - cf_node *c = static_cast(this); - return c->bc.op_ptr->flags; -} - -unsigned node::alu_op_flags() { - assert(is_alu_inst()); - alu_node *c = static_cast(this); - return c->bc.op_ptr->flags; -} - -unsigned node::fetch_op_flags() { - assert(is_fetch_inst()); - fetch_node *c = static_cast(this); - return c->bc.op_ptr->flags; -} - -unsigned node::alu_op_slot_flags() { - assert(is_alu_inst()); - alu_node *c = static_cast(this); - return c->bc.slot_flags; -} - -region_node* node::get_parent_region() { - node *p = this; - while ((p = p->parent)) - if (p->is_region()) - return static_cast(p); - return NULL; -} - -unsigned container_node::real_alu_count() { - unsigned c = 0; - node *t = first; - while (t) { - if (t->is_alu_inst()) - ++c; - else if (t->is_alu_packed()) - c += static_cast(t)->count(); - t = t->next; - } - return c; -} - -void container_node::collect_stats(node_stats& s) { - - for (node_iterator I = begin(), E = end(); I != E; ++I) { - node *n = *I; - if (n->is_container()) { - static_cast(n)->collect_stats(s); - } - - if (n->is_alu_inst()) { - ++s.alu_count; - alu_node *a = static_cast(n); - if (a->bc.op_ptr->flags & AF_KILL) - ++s.alu_kill_count; - else if (a->is_copy_mov()) - ++s.alu_copy_mov_count; - if (a->uses_ar()) - s.uses_ar = true; - } else if (n->is_fetch_inst()) - ++s.fetch_count; - else if (n->is_cf_inst()) - ++s.cf_count; - else if (n->is_region()) { - ++s.region_count; - region_node *r = static_cast(n); - if(r->is_loop()) - ++s.loop_count; - - if (r->phi) - s.phi_count += r->phi->count(); - if (r->loop_phi) - s.loop_phi_count += r->loop_phi->count(); - } - else if (n->is_depart()) - ++s.depart_count; - else if (n->is_repeat()) - ++s.repeat_count; - else if (n->is_if()) - ++s.if_count; - } -} - -void region_node::expand_depart(depart_node *d) { - depart_vec::iterator I = departs.begin() + d->dep_id, E; - I = departs.erase(I); - E = departs.end(); - while (I != E) { - --(*I)->dep_id; - ++I; - } - d->expand(); -} - -void region_node::expand_repeat(repeat_node *r) { - repeat_vec::iterator I = repeats.begin() + r->rep_id - 1, E; - I = repeats.erase(I); - E = repeats.end(); - while (I != E) { - --(*I)->rep_id; - ++I; - } - r->expand(); -} - -void node_stats::dump() { - sblog << " alu_count : " << alu_count << "\n"; - sblog << " alu_kill_count : " << alu_kill_count << "\n"; - sblog << " alu_copy_mov_count : " << alu_copy_mov_count << "\n"; - sblog << " cf_count : " << cf_count << "\n"; - sblog << " fetch_count : " << fetch_count << "\n"; - sblog << " region_count : " << region_count << "\n"; - sblog << " loop_count : " << loop_count << "\n"; - sblog << " phi_count : " << phi_count << "\n"; - sblog << " loop_phi_count : " << loop_phi_count << "\n"; - sblog << " depart_count : " << depart_count << "\n"; - sblog << " repeat_count : " << repeat_count << "\n"; - sblog << " if_count : " << if_count << "\n"; -} - -unsigned alu_node::interp_param() { - if (!(bc.op_ptr->flags & AF_INTERP)) - return 0; - unsigned param; - if (bc.op_ptr->src_count == 2) { - param = src[1]->select.sel(); - } else { - param = src[0]->select.sel(); - } - return param + 1; -} - -alu_group_node* alu_node::get_alu_group_node() { - node *p = parent; - if (p) { - if (p->subtype == NST_ALU_PACKED_INST) { - assert(p->parent && p->parent->subtype == NST_ALU_GROUP); - p = p->parent; - } - return static_cast(p); - } - return NULL; -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h deleted file mode 100644 index eecf17d..0000000 --- a/src/gallium/drivers/r600/sb/sb_ir.h +++ /dev/null @@ -1,1189 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#ifndef R600_SB_IR_H_ -#define R600_SB_IR_H_ - -#include -#include -#include -#include -#include - -#include "sb_bc.h" - -namespace r600_sb { - -enum special_regs { - SV_ALU_PRED = 128, - SV_EXEC_MASK, - SV_AR_INDEX, - SV_VALID_MASK, - SV_GEOMETRY_EMIT, - SV_LDS_RW, - SV_LDS_OQA, - SV_LDS_OQB, - SV_SCRATCH -}; - -class node; -class value; -class shader; - -struct sel_chan -{ - unsigned id; - - sel_chan(unsigned id = 0) : id(id) {} - sel_chan(unsigned sel, unsigned chan) : id(((sel << 2) | chan) + 1) {} - - unsigned sel() const { return sel(id); } - unsigned chan() const {return chan(id); } - operator unsigned() const {return id;} - - static unsigned sel(unsigned idx) { return (idx-1) >> 2; } - static unsigned chan(unsigned idx) { return (idx-1) & 3; } - - sel_chan(unsigned bank, unsigned index, - unsigned chan, alu_kcache_index_mode index_mode) - : id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 28), chan).id) {} - unsigned kcache_index_mode() const { return sel() >> 28; } - unsigned kcache_sel() const { return sel() & 0x0fffffffu; } - unsigned kcache_bank() const { return kcache_sel() >> 12; } -}; - -inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) { - static const char * ch = "xyzw"; - o << r.sel() << "." << ch[r.chan()]; - return o; -} - -typedef std::vector vvec; - -class sb_pool { -protected: - static const unsigned SB_POOL_ALIGN = 8; - static const unsigned SB_POOL_DEFAULT_BLOCK_SIZE = (1 << 16); - - typedef std::vector block_vector; - - unsigned block_size; - block_vector blocks; - unsigned total_size; - -public: - sb_pool(unsigned block_size = SB_POOL_DEFAULT_BLOCK_SIZE) - : block_size(block_size), blocks(), total_size() {} - - virtual ~sb_pool() { free_all(); } - - void* allocate(unsigned sz); - -protected: - void free_all(); -}; - -template > -class sb_set { - typedef std::vector data_vector; - data_vector vec; -public: - - typedef typename data_vector::iterator iterator; - typedef typename data_vector::const_iterator const_iterator; - - sb_set() : vec() {} - ~sb_set() { } - - iterator begin() { return vec.begin(); } - iterator end() { return vec.end(); } - const_iterator begin() const { return vec.begin(); } - const_iterator end() const { return vec.end(); } - - void add_set(const sb_set& s) { - data_vector t; - t.reserve(vec.size() + s.vec.size()); - std::set_union(vec.begin(), vec.end(), s.vec.begin(), s.vec.end(), - std::inserter(t, t.begin()), Comp()); - vec.swap(t); - } - - iterator lower_bound(const V& v) { - return std::lower_bound(vec.begin(), vec.end(), v, Comp()); - } - - std::pair insert(const V& v) { - iterator P = lower_bound(v); - if (P != vec.end() && is_equal(*P, v)) - return std::make_pair(P, false); - return std::make_pair(vec.insert(P, v), true); - } - - unsigned erase(const V& v) { - iterator P = lower_bound(v); - if (P == vec.end() || !is_equal(*P, v)) - return 0; - vec.erase(P); - return 1; - } - - void clear() { vec.clear(); } - - bool empty() { return vec.empty(); } - - bool is_equal(const V& v1, const V& v2) { - return !Comp()(v1, v2) && !Comp()(v2, v1); - } - - iterator find(const V& v) { - iterator P = lower_bound(v); - return (P != vec.end() && is_equal(*P, v)) ? P : vec.end(); - } - - unsigned size() { return vec.size(); } - void erase(iterator I) { vec.erase(I); } -}; - -template > -class sb_map { - typedef std::pair datatype; - - struct Comp { - bool operator()(const datatype &v1, const datatype &v2) { - return KComp()(v1.first, v2.first); - } - }; - - typedef sb_set dataset; - - dataset set; - -public: - - sb_map() : set() {} - - typedef typename dataset::iterator iterator; - - iterator begin() { return set.begin(); } - iterator end() { return set.end(); } - - void clear() { set.clear(); } - - V& operator[](const K& key) { - datatype P = std::make_pair(key, V()); - iterator F = set.find(P); - if (F == set.end()) { - return (*(set.insert(P).first)).second; - } else { - return (*F).second; - } - } - - std::pair insert(const datatype& d) { - return set.insert(d); - } - - iterator find(const K& key) { - return set.find(std::make_pair(key, V())); - } - - unsigned erase(const K& key) { - return set.erase(std::make_pair(key, V())); - } - - void erase(iterator I) { - set.erase(I); - } -}; - -class sb_bitset { - typedef uint32_t basetype; - static const unsigned bt_bits = sizeof(basetype) << 3; - std::vector data; - unsigned bit_size; - -public: - - sb_bitset() : data(), bit_size() {} - - bool get(unsigned id); - void set(unsigned id, bool bit = true); - bool set_chk(unsigned id, bool bit = true); - - void clear(); - void resize(unsigned size); - - unsigned size() { return bit_size; } - - unsigned find_bit(unsigned start = 0); - - void swap(sb_bitset & bs2); - - bool operator==(const sb_bitset &bs2); - bool operator!=(const sb_bitset &bs2) { return !(*this == bs2); } - - sb_bitset& operator|=(const sb_bitset &bs2) { - if (bit_size < bs2.bit_size) { - resize(bs2.bit_size); - } - - for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c; - ++i) { - data[i] |= bs2.data[i]; - } - return *this; - } - - sb_bitset& operator&=(const sb_bitset &bs2); - sb_bitset& mask(const sb_bitset &bs2); - - friend sb_bitset operator|(const sb_bitset &b1, const sb_bitset &b2) { - sb_bitset nbs(b1); - nbs |= b2; - return nbs; - } -}; - -enum value_kind { - VLK_REG, - VLK_REL_REG, - VLK_SPECIAL_REG, - VLK_TEMP, - - VLK_CONST, - VLK_KCACHE, - VLK_PARAM, - VLK_SPECIAL_CONST, - - VLK_UNDEF -}; - - - -class sb_value_pool : protected sb_pool { - unsigned aligned_elt_size; - -public: - sb_value_pool(unsigned elt_size, unsigned block_elts = 256) - : sb_pool(block_elts * (aligned_elt_size = ((elt_size + - SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1)))) {} - - virtual ~sb_value_pool() { delete_all(); } - - value* create(value_kind k, sel_chan regid, unsigned ver); - - value* operator[](unsigned id) { - unsigned offset = id * aligned_elt_size; - unsigned block_id; - if (offset < block_size) { - block_id = 0; - } else { - block_id = offset / block_size; - offset = offset % block_size; - } - return (value*)((char*)blocks[block_id] + offset); - } - - unsigned size() { return total_size / aligned_elt_size; } - -protected: - void delete_all(); -}; - - - - - -class sb_value_set { - - sb_bitset bs; - -public: - sb_value_set() : bs() {} - - class iterator { - sb_value_pool &vp; - sb_value_set *s; - unsigned nb; - public: - iterator(shader &sh, sb_value_set *s, unsigned nb = 0); - - - iterator& operator++() { - if (nb + 1 < s->bs.size()) - nb = s->bs.find_bit(nb + 1); - else - nb = s->bs.size(); - return *this; - } - bool operator !=(const iterator &i) { - return s != i.s || nb != i.nb; - } - bool operator ==(const iterator &i) { return !(*this != i); } - value* operator *() { - return vp[nb]; - } - - - }; - - iterator begin(shader &sh) { - return iterator(sh, this, bs.size() ? bs.find_bit(0) : 0); - } - iterator end(shader &sh) { return iterator(sh, this, bs.size()); } - - bool add_set_checked(sb_value_set & s2); - - void add_set(sb_value_set & s2) { - if (bs.size() < s2.bs.size()) - bs.resize(s2.bs.size()); - bs |= s2.bs; - } - - void remove_set(sb_value_set & s2); - - bool add_vec(vvec &vv); - - bool add_val(value *v); - bool contains(value *v); - - bool remove_val(value *v); - - bool remove_vec(vvec &vv); - - void clear(); - - bool empty(); -}; - -typedef sb_value_set val_set; - -struct gpr_array { - sel_chan base_gpr; // original gpr - sel_chan gpr; // assigned by regalloc - unsigned array_size; - - gpr_array(sel_chan base_gpr, unsigned array_size) : base_gpr(base_gpr), - array_size(array_size) {} - - unsigned hash() { return (base_gpr << 10) * array_size; } - - val_set interferences; - vvec refs; - - bool is_dead(); - -}; - -typedef std::vector regarray_vec; - -enum value_flags { - VLF_UNDEF = (1 << 0), - VLF_READONLY = (1 << 1), - VLF_DEAD = (1 << 2), - - VLF_PIN_REG = (1 << 3), - VLF_PIN_CHAN = (1 << 4), - - // opposite to alu clause local value - goes through alu clause boundary - // (can't use temp gpr, can't recolor in the alu scheduler, etc) - VLF_GLOBAL = (1 << 5), - VLF_FIXED = (1 << 6), - VLF_PVPS = (1 << 7), - - VLF_PREALLOC = (1 << 8) -}; - -inline value_flags operator |(value_flags l, value_flags r) { - return (value_flags)((unsigned)l|(unsigned)r); -} -inline value_flags operator &(value_flags l, value_flags r) { - return (value_flags)((unsigned)l&(unsigned)r); -} -inline value_flags operator ~(value_flags l) { - return (value_flags)(~(unsigned)l); -} -inline value_flags& operator |=(value_flags &l, value_flags r) { - l = l | r; - return l; -} -inline value_flags& operator &=(value_flags &l, value_flags r) { - l = l & r; - return l; -} - -sb_ostream& operator << (sb_ostream &o, value &v); - -typedef uint32_t value_hash; - -typedef std::list< node * > uselist; - -enum constraint_kind { - CK_SAME_REG, - CK_PACKED_BS, - CK_PHI -}; - -class shader; -class sb_value_pool; -struct ra_chunk; -class ra_constraint; - -class value { -protected: - value(unsigned sh_id, value_kind k, sel_chan select, unsigned ver = 0) - : kind(k), flags(), - rel(), array(), - version(ver), select(select), pin_gpr(select), gpr(), - gvn_source(), ghash(), - def(), adef(), uses(), constraint(), chunk(), - literal_value(), uid(sh_id) {} - - ~value() { delete_uses(); } - - friend class sb_value_pool; -public: - value_kind kind; - value_flags flags; - - vvec mdef; - vvec muse; - value *rel; - gpr_array *array; - - unsigned version; - - sel_chan select; - sel_chan pin_gpr; - sel_chan gpr; - - value *gvn_source; - value_hash ghash; - - node *def, *adef; - uselist uses; - - ra_constraint *constraint; - ra_chunk *chunk; - - literal literal_value; - - bool is_const() { return kind == VLK_CONST || kind == VLK_UNDEF; } - - bool is_AR() { - return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0); - } - bool is_geometry_emit() { - return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0); - } - bool is_lds_access() { - return is_special_reg() && select == sel_chan(SV_LDS_RW, 0); - } - bool is_lds_oq() { - return is_special_reg() && (select == sel_chan(SV_LDS_OQA, 0) || select == sel_chan(SV_LDS_OQB, 0)); - } - - node* any_def() { - assert(!(def && adef)); - return def ? def : adef; - } - - value* gvalue() { - value *v = this; - while (v->gvn_source && v != v->gvn_source) - // FIXME we really shouldn't have such chains - v = v->gvn_source; - return v; - } - bool is_scratch() { - return is_special_reg() && select == sel_chan(SV_SCRATCH, 0); - } - - bool is_float_0_or_1() { - value *v = gvalue(); - return v->is_const() && (v->literal_value == literal(0) - || v->literal_value == literal(1.0f)); - } - - bool is_undef() { return gvalue()->kind == VLK_UNDEF; } - - bool is_any_gpr() { - return (kind == VLK_REG || kind == VLK_TEMP); - } - - bool is_agpr() { - return array && is_any_gpr(); - } - - // scalar gpr, as opposed to element of gpr array - bool is_sgpr() { - return !array && is_any_gpr(); - } - - bool is_special_reg() { return kind == VLK_SPECIAL_REG; } - bool is_any_reg() { return is_any_gpr() || is_special_reg(); } - bool is_kcache() { return kind == VLK_KCACHE; } - bool is_rel() { return kind == VLK_REL_REG; } - bool is_readonly() { return flags & VLF_READONLY; } - - bool is_chan_pinned() { return flags & VLF_PIN_CHAN; } - bool is_reg_pinned() { return flags & VLF_PIN_REG; } - - bool is_global(); - void set_global(); - void set_prealloc(); - - bool is_prealloc(); - - bool is_fixed(); - void fix(); - - bool is_dead() { return flags & VLF_DEAD; } - - literal & get_const_value() { - value *v = gvalue(); - assert(v->is_const()); - return v->literal_value; - } - - // true if needs to be encoded as literal in alu - bool is_literal() { - return is_const() - && literal_value != literal(0) - && literal_value != literal(1) - && literal_value != literal(-1) - && literal_value != literal(0.5) - && literal_value != literal(1.0); - } - - void add_use(node *n); - void remove_use(const node *n); - - value_hash hash(); - value_hash rel_hash(); - - void assign_source(value *v) { - assert(!gvn_source || gvn_source == this); - gvn_source = v->gvalue(); - } - - bool v_equal(value *v) { return gvalue() == v->gvalue(); } - - unsigned use_count(); - void delete_uses(); - - sel_chan get_final_gpr() { - if (array && array->gpr) { - int reg_offset = select.sel() - array->base_gpr.sel(); - if (rel && rel->is_const()) - reg_offset += rel->get_const_value().i; - return array->gpr + (reg_offset << 2); - } else { - return gpr; - } - } - - unsigned get_final_chan() { - if (array) { - assert(array->gpr); - return array->gpr.chan(); - } else { - assert(gpr); - return gpr.chan(); - } - } - - /* Check whether copy-propagation of src into this would create an access - * conflict with relative addressing, i.e. an operation that tries to access - * array elements with different address register values. - */ - bool no_reladdr_conflict_with(value *src); - - val_set interferences; - unsigned uid; -}; - -class expr_handler; - -class value_table { - typedef std::vector vt_item; - typedef std::vector vt_table; - - expr_handler &ex; - - unsigned size_bits; - unsigned size; - unsigned size_mask; - - vt_table hashtable; - - unsigned cnt; - -public: - - value_table(expr_handler &ex, unsigned size_bits = 10) - : ex(ex), size_bits(size_bits), size(1u << size_bits), - size_mask(size - 1), hashtable(size), cnt() {} - - ~value_table() {} - - void add_value(value* v); - - bool expr_equal(value* l, value* r); - - unsigned count() { return cnt; } - - void get_values(vvec & v); -}; - -class sb_context; - -enum node_type { - NT_UNKNOWN, - NT_LIST, - NT_OP, - NT_REGION, - NT_REPEAT, - NT_DEPART, - NT_IF, -}; - -enum node_subtype { - NST_UNKNOWN, - NST_LIST, - NST_ALU_GROUP, - NST_ALU_CLAUSE, - NST_ALU_INST, - NST_ALU_PACKED_INST, - NST_CF_INST, - NST_FETCH_INST, - NST_TEX_CLAUSE, - NST_VTX_CLAUSE, - NST_GDS_CLAUSE, - - NST_BB, - - NST_PHI, - NST_PSI, - NST_COPY, - - NST_LOOP_PHI_CONTAINER, - NST_LOOP_CONTINUE, - NST_LOOP_BREAK -}; - -enum node_flags { - NF_EMPTY = 0, - NF_DEAD = (1 << 0), - NF_REG_CONSTRAINT = (1 << 1), - NF_CHAN_CONSTRAINT = (1 << 2), - NF_ALU_4SLOT = (1 << 3), - NF_CONTAINER = (1 << 4), - - NF_COPY_MOV = (1 << 5), - - NF_DONT_KILL = (1 << 6), - NF_DONT_HOIST = (1 << 7), - NF_DONT_MOVE = (1 << 8), - - // for KILLxx - we want to schedule them as early as possible - NF_SCHEDULE_EARLY = (1 << 9), - - // for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU - NF_ALU_STACK_WORKAROUND = (1 << 10), - NF_ALU_2SLOT = (1 << 11), -}; - -inline node_flags operator |(node_flags l, node_flags r) { - return (node_flags)((unsigned)l|(unsigned)r); -} -inline node_flags& operator |=(node_flags &l, node_flags r) { - l = l | r; - return l; -} - -inline node_flags& operator &=(node_flags &l, node_flags r) { - l = (node_flags)((unsigned)l & (unsigned)r); - return l; -} - -inline node_flags operator ~(node_flags r) { - return (node_flags)~(unsigned)r; -} - -struct node_stats { - unsigned alu_count; - unsigned alu_kill_count; - unsigned alu_copy_mov_count; - unsigned cf_count; - unsigned fetch_count; - unsigned region_count; - unsigned loop_count; - unsigned phi_count; - unsigned loop_phi_count; - unsigned depart_count; - unsigned repeat_count; - unsigned if_count; - bool uses_ar; - - node_stats() : alu_count(), alu_kill_count(), alu_copy_mov_count(), - cf_count(), fetch_count(), region_count(), - loop_count(), phi_count(), loop_phi_count(), depart_count(), - repeat_count(), if_count(), uses_ar(false) {} - - void dump(); -}; - -class shader; - -class vpass; - -class container_node; -class region_node; - -class node { - -protected: - node(node_type nt, node_subtype nst, node_flags flags = NF_EMPTY) - : prev(), next(), parent(), - type(nt), subtype(nst), flags(flags), - pred(), dst(), src() {} - - virtual ~node() {}; - -public: - node *prev, *next; - container_node *parent; - - node_type type; - node_subtype subtype; - node_flags flags; - - value *pred; - - vvec dst; - vvec src; - - virtual bool is_valid() { return true; } - virtual bool accept(vpass &p, bool enter); - - void insert_before(node *n); - void insert_after(node *n); - void replace_with(node *n); - void remove(); - - virtual value_hash hash() const; - value_hash hash_src() const; - - virtual bool fold_dispatch(expr_handler *ex); - - bool is_container() { return flags & NF_CONTAINER; } - - bool is_alu_packed() { return subtype == NST_ALU_PACKED_INST; } - bool is_alu_inst() { return subtype == NST_ALU_INST; } - bool is_alu_group() { return subtype == NST_ALU_GROUP; } - bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; } - - bool is_fetch_clause() { - return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE || subtype == NST_GDS_CLAUSE; - } - - bool is_copy() { return subtype == NST_COPY; } - bool is_copy_mov() { return flags & NF_COPY_MOV; } - bool is_any_alu() { return is_alu_inst() || is_alu_packed() || is_copy(); } - - bool is_fetch_inst() { return subtype == NST_FETCH_INST; } - bool is_cf_inst() { return subtype == NST_CF_INST; } - - bool is_region() { return type == NT_REGION; } - bool is_depart() { return type == NT_DEPART; } - bool is_repeat() { return type == NT_REPEAT; } - bool is_if() { return type == NT_IF; } - bool is_bb() { return subtype == NST_BB; } - - bool is_phi() { return subtype == NST_PHI; } - - bool is_dead() { return flags & NF_DEAD; } - - bool is_cf_op(unsigned op); - bool is_alu_op(unsigned op); - bool is_fetch_op(unsigned op); - - unsigned cf_op_flags(); - unsigned alu_op_flags(); - unsigned alu_op_slot_flags(); - unsigned fetch_op_flags(); - - bool is_mova(); - bool is_pred_set(); - - bool vec_uses_ar(vvec &vv) { - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (v && v->rel && !v->rel->is_const()) - return true; - } - return false; - } - - bool uses_ar() { - return vec_uses_ar(dst) || vec_uses_ar(src); - } - - bool vec_uses_lds_oq(vvec &vv) { - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (v && v->is_lds_oq()) - return true; - } - return false; - } - - bool consumes_lds_oq() { - return vec_uses_lds_oq(src); - } - - bool produces_lds_oq() { - return vec_uses_lds_oq(dst); - } - - region_node* get_parent_region(); - - friend class shader; -}; - -class container_node : public node { -public: - - container_node(node_type nt = NT_LIST, node_subtype nst = NST_LIST, - node_flags flags = NF_EMPTY) - : node(nt, nst, flags | NF_CONTAINER), first(), last(), - live_after(), live_before() {} - - // child items list - node *first, *last; - - val_set live_after; - val_set live_before; - - class iterator { - node *p; - public: - iterator(node *pp = NULL) : p(pp) {} - iterator & operator ++() { p = p->next; return *this;} - iterator & operator --() { p = p->prev; return *this;} - node* operator *() { return p; } - node* operator ->() { return p; } - const iterator advance(int n) { - if (!n) return *this; - iterator I(p); - if (n > 0) while (n--) ++I; - else while (n++) --I; - return I; - } - const iterator operator +(int n) { return advance(n); } - const iterator operator -(int n) { return advance(-n); } - bool operator !=(const iterator &i) { return p != i.p; } - bool operator ==(const iterator &i) { return p == i.p; } - }; - - class riterator { - iterator i; - public: - riterator(node *p = NULL) : i(p) {} - riterator & operator ++() { --i; return *this;} - riterator & operator --() { ++i; return *this;} - node* operator *() { return *i; } - node* operator ->() { return *i; } - bool operator !=(const riterator &r) { return i != r.i; } - bool operator ==(const riterator &r) { return i == r.i; } - }; - - iterator begin() { return first; } - iterator end() { return NULL; } - riterator rbegin() { return last; } - riterator rend() { return NULL; } - - bool empty() { assert(first != NULL || first == last); return !first; } - unsigned count(); - - // used with node containers that represent scheduling queues - // ignores copies and takes into account alu_packed_node items - unsigned real_alu_count(); - - void push_back(node *n); - void push_front(node *n); - - void insert_node_before(node *s, node *n); - void insert_node_after(node *s, node *n); - - void append_from(container_node *c); - - // remove range [b..e) from some container and assign to this container - void move(iterator b, iterator e); - - void expand(); - void expand(container_node *n); - void remove_node(node *n); - - node *cut(iterator b, iterator e); - - void clear() { first = last = NULL; } - - virtual bool is_valid() { return true; } - virtual bool accept(vpass &p, bool enter); - virtual bool fold_dispatch(expr_handler *ex); - - node* front() { return first; } - node* back() { return last; } - - void collect_stats(node_stats &s); - - friend class shader; - - -}; - -typedef container_node::iterator node_iterator; -typedef container_node::riterator node_riterator; - -class alu_group_node : public container_node { -protected: - alu_group_node() : container_node(NT_LIST, NST_ALU_GROUP), literals() {} -public: - - std::vector literals; - - virtual bool is_valid() { return subtype == NST_ALU_GROUP; } - virtual bool accept(vpass &p, bool enter); - - - unsigned literal_chan(literal l) { - std::vector::iterator F = - std::find(literals.begin(), literals.end(), l); - assert(F != literals.end()); - return F - literals.begin(); - } - - friend class shader; -}; - -class cf_node : public container_node { -protected: - cf_node() : container_node(NT_OP, NST_CF_INST), jump_target(), - jump_after_target() { memset(&bc, 0, sizeof(bc_cf)); }; -public: - bc_cf bc; - - cf_node *jump_target; - bool jump_after_target; - - virtual bool is_valid() { return subtype == NST_CF_INST; } - virtual bool accept(vpass &p, bool enter); - virtual bool fold_dispatch(expr_handler *ex); - - void jump(cf_node *c) { jump_target = c; jump_after_target = false; } - void jump_after(cf_node *c) { jump_target = c; jump_after_target = true; } - - friend class shader; -}; - -class alu_node : public node { -protected: - alu_node() : node(NT_OP, NST_ALU_INST) { } -public: - bc_alu bc; - - virtual bool is_valid() { return subtype == NST_ALU_INST; } - virtual bool accept(vpass &p, bool enter); - virtual bool fold_dispatch(expr_handler *ex); - - unsigned forced_bank_swizzle() { - return ((bc.op_ptr->flags & AF_INTERP) && - ((bc.slot_flags == AF_4V) || - (bc.slot_flags == AF_2V))) ? VEC_210 : 0; - } - - // return param index + 1 if instruction references interpolation param, - // otherwise 0 - unsigned interp_param(); - - alu_group_node *get_alu_group_node(); - - friend class shader; -}; - -// for multi-slot instrs - DOT/INTERP/... (maybe useful for 64bit pairs later) -class alu_packed_node : public container_node { -protected: - alu_packed_node() : container_node(NT_OP, NST_ALU_PACKED_INST) {} -public: - - const alu_op_info* op_ptr() { - return static_cast(first)->bc.op_ptr; - } - unsigned op() { return static_cast(first)->bc.op; } - void init_args(bool repl); - - virtual bool is_valid() { return subtype == NST_ALU_PACKED_INST; } - virtual bool accept(vpass &p, bool enter); - virtual bool fold_dispatch(expr_handler *ex); - - unsigned get_slot_mask(); - void update_packed_items(sb_context &ctx); - - friend class shader; -}; - -class fetch_node : public node { -protected: - fetch_node() : node(NT_OP, NST_FETCH_INST) { memset(&bc, 0, sizeof(bc_fetch)); }; -public: - bc_fetch bc; - - virtual bool is_valid() { return subtype == NST_FETCH_INST; } - virtual bool accept(vpass &p, bool enter); - virtual bool fold_dispatch(expr_handler *ex); - - bool uses_grad() { return bc.op_ptr->flags & FF_USEGRAD; } - - friend class shader; -}; - -class region_node; - -class repeat_node : public container_node { -protected: - repeat_node(region_node *target, unsigned id) - : container_node(NT_REPEAT, NST_LIST), target(target), rep_id(id) {} -public: - region_node *target; - unsigned rep_id; - - virtual bool accept(vpass &p, bool enter); - - friend class shader; -}; - -class depart_node : public container_node { -protected: - depart_node(region_node *target, unsigned id) - : container_node(NT_DEPART, NST_LIST), target(target), dep_id(id) {} -public: - region_node *target; - unsigned dep_id; - - virtual bool accept(vpass &p, bool enter); - - friend class shader; -}; - -class if_node : public container_node { -protected: - if_node() : container_node(NT_IF, NST_LIST), cond() {}; -public: - value *cond; // glued to pseudo output (dst[2]) of the PRED_SETxxx - - virtual bool accept(vpass &p, bool enter); - - friend class shader; -}; - -typedef std::vector depart_vec; -typedef std::vector repeat_vec; - -class region_node : public container_node { -protected: - region_node(unsigned id) : container_node(NT_REGION, NST_LIST), region_id(id), - loop_phi(), phi(), vars_defined(), departs(), repeats(), src_loop() - {} -public: - unsigned region_id; - - container_node *loop_phi; - container_node *phi; - - val_set vars_defined; - - depart_vec departs; - repeat_vec repeats; - - // true if region was created for loop in the parser, sometimes repeat_node - // may be optimized away so we need to remember this information - bool src_loop; - - virtual bool accept(vpass &p, bool enter); - - unsigned dep_count() { return departs.size(); } - unsigned rep_count() { return repeats.size() + 1; } - - bool is_loop() { return src_loop || !repeats.empty(); } - - container_node* get_entry_code_location() { - node *p = first; - while (p && (p->is_depart() || p->is_repeat())) - p = static_cast(p)->first; - - container_node *c = static_cast(p); - if (c->is_bb()) - return c; - else - return c->parent; - } - - void expand_depart(depart_node *d); - void expand_repeat(repeat_node *r); - - friend class shader; -}; - -class bb_node : public container_node { -protected: - bb_node(unsigned id, unsigned loop_level) - : container_node(NT_LIST, NST_BB), id(id), loop_level(loop_level) {} -public: - unsigned id; - unsigned loop_level; - - virtual bool accept(vpass &p, bool enter); - - friend class shader; -}; - - -typedef std::vector regions_vec; -typedef std::vector bbs_vec; -typedef std::list sched_queue; -typedef sched_queue::iterator sq_iterator; -typedef std::vector node_vec; -typedef std::list node_list; -typedef std::set node_set; - - - -} // namespace r600_sb - -#endif /* R600_SB_IR_H_ */ diff --git a/src/gallium/drivers/r600/sb/sb_liveness.cpp b/src/gallium/drivers/r600/sb/sb_liveness.cpp deleted file mode 100644 index 8ecc9a5..0000000 --- a/src/gallium/drivers/r600/sb/sb_liveness.cpp +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include "sb_shader.h" -#include "sb_pass.h" - -#define LIV_DEBUG 0 - -#if LIV_DEBUG -#define LIV_DUMP(a) do { a } while (0) -#else -#define LIV_DUMP(a) -#endif - -namespace r600_sb { - -bool liveness::visit(container_node& n, bool enter) { - if (enter) { - n.live_after = live; - process_ins(n); - } else { - process_outs(n); - n.live_before = live; - } - return true; -} - -bool liveness::visit(bb_node& n, bool enter) { - if (enter) { - n.live_after = live; - } else { - n.live_before = live; - } - return true; -} - -bool liveness::visit(alu_group_node& n, bool enter) { - if (enter) { - } else { - } - return true; -} - -bool liveness::visit(cf_node& n, bool enter) { - if (enter) { - if (n.bc.op == CF_OP_CF_END) { - n.flags |= NF_DEAD; - return false; - } - n.live_after = live; - update_interferences(); - process_op(n); - } else { - n.live_before = live; - } - return true; -} - -bool liveness::visit(alu_node& n, bool enter) { - if (enter) { - update_interferences(); - process_op(n); - } else { - } - return false; -} - -bool liveness::visit(alu_packed_node& n, bool enter) { - if (enter) { - update_interferences(); - process_op(n); - - } else { - } - return false; -} - -bool liveness::visit(fetch_node& n, bool enter) { - if (enter) { - update_interferences(); - process_op(n); - } else { - } - return true; -} - -bool liveness::visit(region_node& n, bool enter) { - if (enter) { - val_set s = live; - - update_interferences(); - - if (n.phi) - process_phi_outs(n.phi); - - n.live_after = live; - - live.clear(); - - if (n.loop_phi) { - n.live_before.clear(); - } - - assert(n.count() == 1); - run_on(*static_cast(*n.begin())); - - // second pass for loops - if (n.loop_phi) { - process_phi_outs(n.loop_phi); - n.live_before = live; - - run_on(*static_cast(*n.begin())); - - update_interferences(); // FIXME is it required - - process_phi_outs(n.loop_phi); - process_phi_branch(n.loop_phi, 0); - } - - update_interferences(); // FIXME is it required - - n.live_after = s; - n.live_before = live; - } - return false; -} - -bool liveness::visit(repeat_node& n, bool enter) { - if (enter) { - live = n.target->live_before; - process_phi_branch(n.target->loop_phi, n.rep_id); - } else { - } - return true; -} - -bool liveness::visit(depart_node& n, bool enter) { - if (enter) { - live = n.target->live_after; - if(n.target->phi) - process_phi_branch(n.target->phi, n.dep_id); - } else { - } - return true; -} - -bool liveness::visit(if_node& n, bool enter) { - if (enter) { - assert(n.count() == 1); - n.live_after = live; - - run_on(*static_cast(*n.begin())); - - process_op(n); - live.add_set(n.live_after); - } - return false; -} - -void liveness::update_interferences() { - if (!sh.compute_interferences) - return; - - if (!live_changed) - return; - - LIV_DUMP( - sblog << "interf "; - dump::dump_set(sh, live); - sblog << "\n"; - ); - - val_set& s = live; - for(val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) { - value *v = *I; - assert(v); - - if (v->array) { - v->array->interferences.add_set(s); - } - - v->interferences.add_set(s); - v->interferences.remove_val(v); - - LIV_DUMP( - sblog << "interferences updated for "; - dump::dump_val(v); - sblog << " : "; - dump::dump_set(sh, v->interferences); - sblog << "\n"; - ); - } - live_changed = false; -} - -bool liveness::remove_val(value *v) { - if (live.remove_val(v)) { - v->flags &= ~VLF_DEAD; - return true; - } - v->flags |= VLF_DEAD; - return false; -} - -bool liveness::process_maydef(value *v) { - bool r = false; - vvec::iterator S(v->muse.begin()); - - for (vvec::iterator I = v->mdef.begin(), E = v->mdef.end(); I != E; - ++I, ++S) { - value *&d = *I, *&u = *S; - if (!d) { - assert(!u); - continue; - } - - bool alive = remove_val(d); - if (alive) { - r = true; - } else { - d = NULL; - u = NULL; - } - } - return r; -} - -bool liveness::remove_vec(vvec &vv) { - bool r = false; - for (vvec::reverse_iterator I = vv.rbegin(), E = vv.rend(); I != E; ++I) { - value* &v = *I; - if (!v) - continue; - - if (v->is_rel()) { - r |= process_maydef(v); - } else - r |= remove_val(v); - } - return r; -} - -bool r600_sb::liveness::visit(node& n, bool enter) { - if (enter) { - update_interferences(); - process_op(n); - } - return false; -} - -bool liveness::process_outs(node& n) { - bool alive = remove_vec(n.dst); - if (alive) - live_changed = true; - return alive; -} - -bool liveness::add_vec(vvec &vv, bool src) { - bool r = false; - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (!v || v->is_readonly()) - continue; - - if (v->is_rel()) { - r |= add_vec(v->muse, true); - if (v->rel->is_any_reg()) - r |= live.add_val(v->rel); - - } else if (src) { - r |= live.add_val(v); - } - } - - return r; -} - -void liveness::process_ins(node& n) { - if (!(n.flags & NF_DEAD)) { - - live_changed |= add_vec(n.src, true); - live_changed |= add_vec(n.dst, false); - - if (n.type == NT_IF) { - if_node &in = (if_node&)n; - if (in.cond) - live_changed |= live.add_val(in.cond); - } - if (n.pred) - live_changed |= live.add_val(n.pred); - } -} - -void liveness::process_op(node& n) { - - LIV_DUMP( - sblog << "process_op: "; - dump::dump_op(&n); - sblog << "\n"; - sblog << "process_op: live_after:"; - dump::dump_set(sh, live); - sblog << "\n"; - ); - - if(!n.dst.empty() || n.is_cf_op(CF_OP_CALL_FS)) { - if (!process_outs(n)) { - if (!(n.flags & NF_DONT_KILL)) - n.flags |= NF_DEAD; - } else { - n.flags &= ~NF_DEAD; - } - } - process_ins(n); - - LIV_DUMP( - sblog << "process_op: live_before:"; - dump::dump_set(sh, live); - sblog << "\n"; - ); -} - -int liveness::init() { - - if (sh.compute_interferences) { - gpr_array_vec &vv = sh.arrays(); - for (gpr_array_vec::iterator I = vv.begin(), E = vv.end(); I != E; - ++I) { - gpr_array *a = *I; - a->interferences.clear(); - } - } - - return 0; -} - -void liveness::update_src_vec(vvec &vv, bool src) { - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - - if (!v || !v->is_sgpr()) - continue; - - if (v->rel && v->rel->is_dead()) - v->rel->flags &= ~VLF_DEAD; - - if (src && v->is_dead()) { - v->flags &= ~VLF_DEAD; - } - } -} - -void liveness::process_phi_outs(container_node *phi) { - for (node_iterator I = phi->begin(), E = phi->end(); I != E; ++I) { - node *n = *I; - if (!process_outs(*n)) { - n->flags |= NF_DEAD; - } else { - n->flags &= ~NF_DEAD; - update_src_vec(n->src, true); - update_src_vec(n->dst, false); - } - } -} - -void liveness::process_phi_branch(container_node* phi, unsigned id) { - val_set &s = live; - for (node_iterator I = phi->begin(), E = phi->end(); I != E; ++I) { - node *n = *I; - if (n->is_dead()) - continue; - - value *v = n->src[id]; - - if (!v->is_readonly()) { - live_changed |= s.add_val(v); - v->flags &= ~VLF_DEAD; - } - } -} - -} //namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_pass.cpp b/src/gallium/drivers/r600/sb/sb_pass.cpp deleted file mode 100644 index aecdec8..0000000 --- a/src/gallium/drivers/r600/sb/sb_pass.cpp +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -pass::pass(shader &s) : ctx(s.get_ctx()), sh(s) {} - -int pass::run() { return -1; } - -int vpass::init() { return 0; } -int vpass::done() { return 0; } - -int vpass::run() { - int r; - if ((r = init())) - return r; - - run_on(*sh.root); - - if ((r = done())) - return r; - - return 0; -} - -void vpass::run_on(container_node &n) { - if (n.accept(*this, true)) { - - for (node_iterator N, I = n.begin(), E = n.end(); I != E; I = N) { - N = I; - ++N; - - if (I->is_container()) { - container_node *c = static_cast(*I); - run_on(*c); - } else { - I->accept(*this, true); - I->accept(*this, false); - } - } - - } - n.accept(*this, false); -} - -bool vpass::visit(node& n, bool enter) { return true; } -bool vpass::visit(container_node& n, bool enter) { return true; } -bool vpass::visit(alu_group_node& n, bool enter) { return true; } -bool vpass::visit(cf_node& n, bool enter) { return true; } -bool vpass::visit(alu_node& n, bool enter) { return true; } -bool vpass::visit(alu_packed_node& n, bool enter) { return true; } -bool vpass::visit(fetch_node& n, bool enter) { return true; } -bool vpass::visit(region_node& n, bool enter) { return true; } -bool vpass::visit(repeat_node& n, bool enter) { return true; } -bool vpass::visit(depart_node& n, bool enter) { return true; } -bool vpass::visit(if_node& n, bool enter) { return true; } -bool vpass::visit(bb_node& n, bool enter) { return true; } - -void rev_vpass::run_on(container_node& n) { - if (n.accept(*this, true)) { - - for (node_riterator N, I = n.rbegin(), E = n.rend(); I != E; I = N) { - N = I; - ++N; - - if (I->is_container()) { - container_node *c = static_cast(*I); - run_on(*c); - } else { - I->accept(*this, true); - I->accept(*this, false); - } - } - - } - n.accept(*this, false); -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_pass.h b/src/gallium/drivers/r600/sb/sb_pass.h deleted file mode 100644 index 179eab4..0000000 --- a/src/gallium/drivers/r600/sb/sb_pass.h +++ /dev/null @@ -1,735 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#ifndef SB_PASS_H_ -#define SB_PASS_H_ - -#include - -namespace r600_sb { - -class pass { -protected: - sb_context &ctx; - shader &sh; - -public: - pass(shader &s); - - virtual int run(); - - virtual ~pass() {} -}; - -class vpass : public pass { - -public: - - vpass(shader &s) : pass(s) {} - - virtual int init(); - virtual int done(); - - virtual int run(); - virtual void run_on(container_node &n); - - virtual bool visit(node &n, bool enter); - virtual bool visit(container_node &n, bool enter); - virtual bool visit(alu_group_node &n, bool enter); - virtual bool visit(cf_node &n, bool enter); - virtual bool visit(alu_node &n, bool enter); - virtual bool visit(alu_packed_node &n, bool enter); - virtual bool visit(fetch_node &n, bool enter); - virtual bool visit(region_node &n, bool enter); - virtual bool visit(repeat_node &n, bool enter); - virtual bool visit(depart_node &n, bool enter); - virtual bool visit(if_node &n, bool enter); - virtual bool visit(bb_node &n, bool enter); - -}; - -class rev_vpass : public vpass { - -public: - rev_vpass(shader &s) : vpass(s) {} - - virtual void run_on(container_node &n); -}; - - -// =================== PASSES - -class bytecode; - -class bc_dump : public vpass { - using vpass::visit; - - uint32_t *bc_data; - unsigned ndw; - - unsigned id; - - unsigned new_group, group_index; - -public: - - bc_dump(shader &s, bytecode *bc = NULL); - - bc_dump(shader &s, uint32_t *bc_ptr, unsigned ndw) : - vpass(s), bc_data(bc_ptr), ndw(ndw), id(), new_group(), group_index() {} - - virtual int init(); - virtual int done(); - - virtual bool visit(cf_node &n, bool enter); - virtual bool visit(alu_node &n, bool enter); - virtual bool visit(fetch_node &n, bool enter); - - void dump_dw(unsigned dw_id, unsigned count = 2); - - void dump(cf_node& n); - void dump(alu_node& n); - void dump(fetch_node& n); -}; - - -class dce_cleanup : public vpass { - using vpass::visit; - - bool remove_unused; - -public: - - dce_cleanup(shader &s) : vpass(s), - remove_unused(s.dce_flags & DF_REMOVE_UNUSED), nodes_changed(false) {} - - virtual int run(); - - virtual bool visit(node &n, bool enter); - virtual bool visit(alu_group_node &n, bool enter); - virtual bool visit(cf_node &n, bool enter); - virtual bool visit(alu_node &n, bool enter); - virtual bool visit(alu_packed_node &n, bool enter); - virtual bool visit(fetch_node &n, bool enter); - virtual bool visit(region_node &n, bool enter); - virtual bool visit(container_node &n, bool enter); - -private: - - void cleanup_dst(node &n); - bool cleanup_dst_vec(vvec &vv); - - // Did we alter/remove nodes during a single pass? - bool nodes_changed; -}; - - -class def_use : public pass { - -public: - - def_use(shader &sh) : pass(sh) {} - - virtual int run(); - void run_on(node *n, bool defs); - -private: - - void process_uses(node *n); - void process_defs(node *n, vvec &vv, bool arr_def); - void process_phi(container_node *c, bool defs, bool uses); -}; - - - -class dump : public vpass { - using vpass::visit; - - int level; - -public: - - dump(shader &s) : vpass(s), level(0) {} - - virtual bool visit(node &n, bool enter); - virtual bool visit(container_node &n, bool enter); - virtual bool visit(alu_group_node &n, bool enter); - virtual bool visit(cf_node &n, bool enter); - virtual bool visit(alu_node &n, bool enter); - virtual bool visit(alu_packed_node &n, bool enter); - virtual bool visit(fetch_node &n, bool enter); - virtual bool visit(region_node &n, bool enter); - virtual bool visit(repeat_node &n, bool enter); - virtual bool visit(depart_node &n, bool enter); - virtual bool visit(if_node &n, bool enter); - virtual bool visit(bb_node &n, bool enter); - - - static void dump_op(node &n, const char *name); - static void dump_vec(const vvec & vv); - static void dump_set(shader &sh, val_set & v); - - static void dump_rels(vvec & vv); - - static void dump_val(value *v); - static void dump_op(node *n); - - static void dump_op_list(container_node *c); - static void dump_queue(sched_queue &q); - - static void dump_alu(alu_node *n); - -private: - - void indent(); - - void dump_common(node &n); - void dump_flags(node &n); - - void dump_live_values(container_node &n, bool before); -}; - - -// Global Code Motion - -class gcm : public pass { - - sched_queue bu_ready[SQ_NUM]; - sched_queue bu_ready_next[SQ_NUM]; - sched_queue bu_ready_early[SQ_NUM]; - sched_queue ready; - sched_queue ready_above; - - unsigned outstanding_lds_oq; - container_node pending; - - struct op_info { - bb_node* top_bb; - bb_node* bottom_bb; - op_info() : top_bb(), bottom_bb() {} - }; - - typedef std::map op_info_map; - - typedef std::map nuc_map; - - op_info_map op_map; - nuc_map uses; - - typedef std::vector nuc_stack; - - nuc_stack nuc_stk; - unsigned ucs_level; - - bb_node * bu_bb; - - vvec pending_defs; - - node_list pending_nodes; - - unsigned cur_sq; - - // for register pressure tracking in bottom-up pass - val_set live; - int live_count; - - static const int rp_threshold = 100; - - bool pending_exec_mask_update; - -public: - - gcm(shader &sh) : pass(sh), - bu_ready(), bu_ready_next(), bu_ready_early(), - ready(), outstanding_lds_oq(), - op_map(), uses(), nuc_stk(1), ucs_level(), - bu_bb(), pending_defs(), pending_nodes(), cur_sq(), - live(), live_count(), pending_exec_mask_update() {} - - virtual int run(); - -private: - - void collect_instructions(container_node *c, bool early_pass); - - void sched_early(container_node *n); - void td_sched_bb(bb_node *bb); - bool td_is_ready(node *n); - void td_release_uses(vvec &v); - void td_release_val(value *v); - void td_schedule(bb_node *bb, node *n); - - void sched_late(container_node *n); - void bu_sched_bb(bb_node *bb); - void bu_release_defs(vvec &v, bool src); - void bu_release_phi_defs(container_node *p, unsigned op); - bool bu_is_ready(node *n); - void bu_release_val(value *v); - void bu_release_op(node * n); - void bu_find_best_bb(node *n, op_info &oi); - void bu_schedule(container_node *bb, node *n); - - void push_uc_stack(); - void pop_uc_stack(); - - void init_def_count(nuc_map &m, container_node &s); - void init_use_count(nuc_map &m, container_node &s); - unsigned get_uc_vec(vvec &vv); - unsigned get_dc_vec(vvec &vv, bool src); - - void add_ready(node *n); - - void dump_uc_stack(); - - unsigned real_alu_count(sched_queue &q, unsigned max); - - // check if we have not less than threshold ready alu instructions - bool check_alu_ready_count(unsigned threshold); -}; - - -class gvn : public vpass { - using vpass::visit; - -public: - - gvn(shader &sh) : vpass(sh) {} - - virtual bool visit(node &n, bool enter); - virtual bool visit(cf_node &n, bool enter); - virtual bool visit(alu_node &n, bool enter); - virtual bool visit(alu_packed_node &n, bool enter); - virtual bool visit(fetch_node &n, bool enter); - virtual bool visit(region_node &n, bool enter); - -private: - - void process_op(node &n, bool rewrite = true); - - // returns true if the value was rewritten - bool process_src(value* &v, bool rewrite); - - - void process_alu_src_constants(node &n, value* &v); -}; - - -class if_conversion : public pass { - -public: - - if_conversion(shader &sh) : pass(sh) {} - - virtual int run(); - - bool run_on(region_node *r); - - void convert_kill_instructions(region_node *r, value *em, bool branch, - container_node *c); - - bool check_and_convert(region_node *r); - - alu_node* convert_phi(value *select, node *phi); - -}; - - -class liveness : public rev_vpass { - using vpass::visit; - - val_set live; - bool live_changed; - -public: - - liveness(shader &s) : rev_vpass(s), live_changed(false) {} - - virtual int init(); - - virtual bool visit(node &n, bool enter); - virtual bool visit(bb_node &n, bool enter); - virtual bool visit(container_node &n, bool enter); - virtual bool visit(alu_group_node &n, bool enter); - virtual bool visit(cf_node &n, bool enter); - virtual bool visit(alu_node &n, bool enter); - virtual bool visit(alu_packed_node &n, bool enter); - virtual bool visit(fetch_node &n, bool enter); - virtual bool visit(region_node &n, bool enter); - virtual bool visit(repeat_node &n, bool enter); - virtual bool visit(depart_node &n, bool enter); - virtual bool visit(if_node &n, bool enter); - -private: - - void update_interferences(); - void process_op(node &n); - - bool remove_val(value *v); - bool remove_vec(vvec &v); - bool process_outs(node& n); - void process_ins(node& n); - - void process_phi_outs(container_node *phi); - void process_phi_branch(container_node *phi, unsigned id); - - bool process_maydef(value *v); - - bool add_vec(vvec &vv, bool src); - - void update_src_vec(vvec &vv, bool src); -}; - - -struct bool_op_info { - bool invert; - unsigned int_cvt; - - alu_node *n; -}; - -class peephole : public pass { - -public: - - peephole(shader &sh) : pass(sh) {} - - virtual int run(); - - void run_on(container_node *c); - - void optimize_cc_op(alu_node *a); - - void optimize_cc_op2(alu_node *a); - void optimize_CNDcc_op(alu_node *a); - - bool get_bool_op_info(value *b, bool_op_info& bop); - bool get_bool_flt_to_int_source(alu_node* &a); - void convert_float_setcc(alu_node *f2i, alu_node *s); -}; - - -class psi_ops : public rev_vpass { - using rev_vpass::visit; - -public: - - psi_ops(shader &s) : rev_vpass(s) {} - - virtual bool visit(node &n, bool enter); - virtual bool visit(alu_node &n, bool enter); - - bool try_inline(node &n); - bool try_reduce(node &n); - bool eliminate(node &n); - - void unpredicate(node *n); -}; - - -// check correctness of the generated code, e.g.: -// - expected source operand value is the last value written to its gpr, -// - all arguments of phi node should be allocated to the same gpr, -// TODO other tests -class ra_checker : public pass { - - typedef std::map reg_value_map; - - typedef std::vector regmap_stack; - - regmap_stack rm_stack; - unsigned rm_stk_level; - - value* prev_dst[5]; - -public: - - ra_checker(shader &sh) : pass(sh), rm_stk_level(0), prev_dst() {} - - virtual int run(); - - void run_on(container_node *c); - - void dump_error(const error_info &e); - void dump_all_errors(); - -private: - - reg_value_map& rmap() { return rm_stack[rm_stk_level]; } - - void push_stack(); - void pop_stack(); - - // when going out of the alu clause, values in the clause temporary gprs, - // AR, predicate values, PS/PV are destroyed - void kill_alu_only_regs(); - void error(node *n, unsigned id, std::string msg); - - void check_phi_src(container_node *p, unsigned id); - void process_phi_dst(container_node *p); - void check_alu_group(alu_group_node *g); - void process_op_dst(node *n); - void check_op_src(node *n); - void check_src_vec(node *n, unsigned id, vvec &vv, bool src); - void check_value_gpr(node *n, unsigned id, value *v); -}; - -// ======================================= - - -class ra_coalesce : public pass { - -public: - - ra_coalesce(shader &sh) : pass(sh) {} - - virtual int run(); -}; - - - -// ======================================= - -class ra_init : public pass { - -public: - - ra_init(shader &sh) : pass(sh), prev_chans() { - - // The parameter below affects register channels distribution. - // For cayman (VLIW-4) we're trying to distribute the channels - // uniformly, this means significantly better alu slots utilization - // at the expense of higher gpr usage. Hopefully this will improve - // performance, though it has to be proven with real benchmarks yet. - // For VLIW-5 this method could also slightly improve slots - // utilization, but increased register pressure seems more significant - // and overall performance effect is negative according to some - // benchmarks, so it's not used currently. Basically, VLIW-5 doesn't - // really need it because trans slot (unrestricted by register write - // channel) allows to consume most deviations from uniform channel - // distribution. - // Value 3 means that for new allocation we'll use channel that differs - // from 3 last used channels. 0 for VLIW-5 effectively turns this off. - - ra_tune = sh.get_ctx().is_cayman() ? 3 : 0; - } - - virtual int run(); - -private: - - unsigned prev_chans; - unsigned ra_tune; - - void add_prev_chan(unsigned chan); - unsigned get_preferable_chan_mask(); - - bool ra_node(container_node *c); - bool process_op(node *n); - - bool color(value *v); - - void color_bs_constraint(ra_constraint *c); - - void assign_color(value *v, sel_chan c); - void alloc_arrays(); -}; - -// ======================================= - -class ra_split : public pass { - -public: - - ra_split(shader &sh) : pass(sh) {} - - virtual int run(); - - void split(container_node *n); - void split_op(node *n); - void split_alu_packed(alu_packed_node *n); - void split_vector_inst(node *n); - - void split_packed_ins(alu_packed_node *n); - -#if 0 - void split_pinned_outs(node *n); -#endif - - void split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz); - - void split_phi_src(container_node *loc, container_node *c, unsigned id, - bool loop); - void split_phi_dst(node *loc, container_node *c, bool loop); - void init_phi_constraints(container_node *c); -}; - - - -class ssa_prepare : public vpass { - using vpass::visit; - - typedef std::vector vd_stk; - vd_stk stk; - - unsigned level; - -public: - ssa_prepare(shader &s) : vpass(s), level(0) {} - - virtual bool visit(cf_node &n, bool enter); - virtual bool visit(alu_node &n, bool enter); - virtual bool visit(fetch_node &n, bool enter); - virtual bool visit(region_node &n, bool enter); - virtual bool visit(repeat_node &n, bool enter); - virtual bool visit(depart_node &n, bool enter); - -private: - - void push_stk() { - ++level; - if (level + 1 > stk.size()) - stk.resize(level+1); - else - stk[level].clear(); - } - void pop_stk() { - assert(level); - --level; - stk[level].add_set(stk[level + 1]); - } - - void add_defs(node &n); - - val_set & cur_set() { return stk[level]; } - - container_node* create_phi_nodes(int count); -}; - -class ssa_rename : public vpass { - using vpass::visit; - - typedef sb_map def_map; - - def_map def_count; - def_map lds_oq_count; - def_map lds_rw_count; - std::stack rename_stack; - std::stack rename_lds_oq_stack; - std::stack rename_lds_rw_stack; - - typedef std::map val_map; - val_map values; - -public: - - ssa_rename(shader &s) : vpass(s) {} - - virtual int init(); - - virtual bool visit(container_node &n, bool enter); - virtual bool visit(node &n, bool enter); - virtual bool visit(alu_group_node &n, bool enter); - virtual bool visit(cf_node &n, bool enter); - virtual bool visit(alu_node &n, bool enter); - virtual bool visit(alu_packed_node &n, bool enter); - virtual bool visit(fetch_node &n, bool enter); - virtual bool visit(region_node &n, bool enter); - virtual bool visit(repeat_node &n, bool enter); - virtual bool visit(depart_node &n, bool enter); - virtual bool visit(if_node &n, bool enter); - -private: - - void push(node *phi); - void pop(); - - unsigned get_index(def_map& m, value* v); - void set_index(def_map& m, value* v, unsigned index); - unsigned new_index(def_map& m, value* v); - - value* rename_use(node *n, value* v); - value* rename_def(node *def, value* v); - - void rename_src_vec(node *n, vvec &vv, bool src); - void rename_dst_vec(node *def, vvec &vv, bool set_def); - - void rename_src(node *n); - void rename_dst(node *n); - - void rename_phi_args(container_node *phi, unsigned op, bool def); - - void rename_virt(node *n); - void rename_virt_val(node *n, value *v); -}; - -class bc_finalizer : public pass { - - cf_node *last_export[EXP_TYPE_COUNT]; - cf_node *last_cf; - - unsigned ngpr; - unsigned nstack; - -public: - - bc_finalizer(shader &sh) : pass(sh), last_export(), last_cf(), ngpr(), - nstack() {} - - virtual int run(); - - void finalize_loop(region_node *r); - void finalize_if(region_node *r); - - void run_on(container_node *c); - - void insert_rv6xx_load_ar_workaround(alu_group_node *b4); - void finalize_alu_group(alu_group_node *g, node *prev_node); - bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node *prev_node); - - void emit_set_grad(fetch_node* f); - void finalize_fetch(fetch_node *f); - - void finalize_cf(cf_node *c); - - sel_chan translate_kcache(cf_node *alu, value *v); - - void update_ngpr(unsigned gpr); - void update_nstack(region_node *r, unsigned add = 0); - - unsigned get_stack_depth(node *n, unsigned &loops, unsigned &ifs, - unsigned add = 0); - - void cf_peephole(); - -private: - void copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start); - void emit_set_texture_offsets(fetch_node &f); -}; - - -} // namespace r600_sb - -#endif /* SB_PASS_H_ */ diff --git a/src/gallium/drivers/r600/sb/sb_peephole.cpp b/src/gallium/drivers/r600/sb/sb_peephole.cpp deleted file mode 100644 index 5e33631..0000000 --- a/src/gallium/drivers/r600/sb/sb_peephole.cpp +++ /dev/null @@ -1,368 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#define PPH_DEBUG 0 - -#if PPH_DEBUG -#define PPH_DUMP(q) do { q } while (0) -#else -#define PPH_DUMP(q) -#endif - -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -int peephole::run() { - - run_on(sh.root); - - return 0; -} - -void peephole::run_on(container_node* c) { - - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - node *n = *I; - - if (n->is_container()) - run_on(static_cast(n)); - else { - if (n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS)) { - fetch_node *f = static_cast(n); - bool has_dst = false; - - for(vvec::iterator I = f->dst.begin(), E = f->dst.end(); I != E; ++I) { - value *v = *I; - if (v) - has_dst = true; - } - if (!has_dst) - if (f->bc.op >= FETCH_OP_GDS_ADD_RET && f->bc.op <= FETCH_OP_GDS_USHORT_READ_RET) - f->bc.set_op(f->bc.op - FETCH_OP_GDS_ADD_RET + FETCH_OP_GDS_ADD); - } - if (n->is_alu_inst()) { - alu_node *a = static_cast(n); - - if (a->bc.op_ptr->flags & AF_LDS) { - if (!a->dst[0]) { - if (a->bc.op >= LDS_OP2_LDS_ADD_RET && a->bc.op <= LDS_OP3_LDS_MSKOR_RET) - a->bc.set_op(a->bc.op - LDS_OP2_LDS_ADD_RET + LDS_OP2_LDS_ADD); - if (a->bc.op == LDS_OP1_LDS_READ_RET) - a->src[0] = sh.get_undef_value(); - } - } else if (a->bc.op_ptr->flags & - (AF_PRED | AF_SET | AF_CMOV | AF_KILL)) { - optimize_cc_op(a); - } else if (a->bc.op == ALU_OP1_FLT_TO_INT) { - - alu_node *s = a; - if (get_bool_flt_to_int_source(s)) { - convert_float_setcc(a, s); - } - } - } - } - } -} - -void peephole::optimize_cc_op(alu_node* a) { - unsigned aflags = a->bc.op_ptr->flags; - - if (aflags & (AF_PRED | AF_SET | AF_KILL)) { - optimize_cc_op2(a); - } else if (aflags & AF_CMOV) { - optimize_CNDcc_op(a); - } -} - -void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) { - alu_node *ns = sh.clone(s); - - ns->dst[0] = f2i->dst[0]; - ns->dst[0]->def = ns; - ns->bc.set_op(ns->bc.op + (ALU_OP2_SETE_DX10 - ALU_OP2_SETE)); - f2i->insert_after(ns); - f2i->remove(); -} - -void peephole::optimize_cc_op2(alu_node* a) { - - unsigned flags = a->bc.op_ptr->flags; - unsigned cc = flags & AF_CC_MASK; - - if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred) - return; - - unsigned cmp_type = flags & AF_CMP_TYPE_MASK; - unsigned dst_type = flags & AF_DST_TYPE_MASK; - - int op_kind = (flags & AF_PRED) ? 1 : - (flags & AF_SET) ? 2 : - (flags & AF_KILL) ? 3 : 0; - - bool swapped = false; - - if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) { - std::swap(a->src[0],a->src[1]); - swapped = true; - // clear modifiers - a->bc.src[0].clear(); - a->bc.src[1].clear(); - } - - if (swapped || (a->src[1]->is_const() && - a->src[1]->literal_value == literal(0))) { - - value *s = a->src[0]; - - bool_op_info bop = {}; - - PPH_DUMP( - sblog << "cc_op2: "; - dump::dump_op(a); - sblog << "\n"; - ); - - if (!get_bool_op_info(s, bop)) - return; - - if (cc == AF_CC_E) - bop.invert = !bop.invert; - - bool swap_args = false; - - cc = bop.n->bc.op_ptr->flags & AF_CC_MASK; - - if (bop.invert) - cc = invert_setcc_condition(cc, swap_args); - - if (bop.int_cvt) { - assert(cmp_type != AF_FLOAT_CMP); - cmp_type = AF_FLOAT_CMP; - } - - PPH_DUMP( - sblog << "boi node: "; - dump::dump_op(bop.n); - sblog << " invert: " << bop.invert << " int_cvt: " << bop.int_cvt; - sblog <<"\n"; - ); - - unsigned newop; - - switch(op_kind) { - case 1: - newop = get_predsetcc_op(cc, cmp_type); - break; - case 2: - newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST); - break; - case 3: - newop = get_killcc_op(cc, cmp_type); - break; - default: - newop = ALU_OP0_NOP; - assert(!"invalid op kind"); - break; - } - - a->bc.set_op(newop); - - if (swap_args) { - a->src[0] = bop.n->src[1]; - a->src[1] = bop.n->src[0]; - a->bc.src[0] = bop.n->bc.src[1]; - a->bc.src[1] = bop.n->bc.src[0]; - - } else { - a->src[0] = bop.n->src[0]; - a->src[1] = bop.n->src[1]; - a->bc.src[0] = bop.n->bc.src[0]; - a->bc.src[1] = bop.n->bc.src[1]; - } - } -} - -void peephole::optimize_CNDcc_op(alu_node* a) { - unsigned flags = a->bc.op_ptr->flags; - unsigned cc = flags & AF_CC_MASK; - unsigned cmp_type = flags & AF_CMP_TYPE_MASK; - bool swap = false; - - if (cc == AF_CC_E) { - swap = !swap; - cc = AF_CC_NE; - } else if (cc != AF_CC_NE) - return; - - value *s = a->src[0]; - - bool_op_info bop = {}; - - PPH_DUMP( - sblog << "cndcc: "; - dump::dump_op(a); - sblog << "\n"; - ); - - if (!get_bool_op_info(s, bop)) - return; - - alu_node *d = bop.n; - - if (d->bc.omod) - return; - - PPH_DUMP( - sblog << "cndcc def: "; - dump::dump_op(d); - sblog << "\n"; - ); - - - unsigned dflags = d->bc.op_ptr->flags; - unsigned dcc = dflags & AF_CC_MASK; - unsigned dcmp_type = dflags & AF_CMP_TYPE_MASK; - unsigned ddst_type = dflags & AF_DST_TYPE_MASK; - int nds; - - // TODO we can handle some of these cases, - // though probably this shouldn't happen - if (cmp_type != AF_FLOAT_CMP && ddst_type == AF_FLOAT_DST) - return; - - if (d->src[0]->is_const() && d->src[0]->literal_value == literal(0)) - nds = 1; - else if ((d->src[1]->is_const() && - d->src[1]->literal_value == literal(0))) - nds = 0; - else - return; - - // can't propagate ABS modifier to CNDcc because it's OP3 - if (d->bc.src[nds].abs) - return; - - // Don't create an instruction that uses three kcache values - // chances are high that it can't be scheduled - if (d->src[0]->is_kcache() && a->src[1]->is_kcache() && - a->src[2]->is_kcache()) - return; - - // TODO we can handle some cases for uint comparison - if (dcmp_type == AF_UINT_CMP) - return; - - if (dcc == AF_CC_NE) { - dcc = AF_CC_E; - swap = !swap; - } - - if (nds == 1) { - switch (dcc) { - case AF_CC_GT: dcc = AF_CC_GE; swap = !swap; break; - case AF_CC_GE: dcc = AF_CC_GT; swap = !swap; break; - default: break; - } - } - - a->src[0] = d->src[nds]; - a->bc.src[0] = d->bc.src[nds]; - - if (swap) { - std::swap(a->src[1], a->src[2]); - std::swap(a->bc.src[1], a->bc.src[2]); - } - - a->bc.set_op(get_cndcc_op(dcc, dcmp_type)); - -} - -bool peephole::get_bool_flt_to_int_source(alu_node* &a) { - - if (a->bc.op == ALU_OP1_FLT_TO_INT) { - - if (a->bc.src[0].neg || a->bc.src[0].abs || a->bc.src[0].rel) - return false; - - value *s = a->src[0]; - if (!s || !s->def || !s->def->is_alu_inst()) - return false; - - alu_node *dn = static_cast(s->def); - - if (dn->is_alu_op(ALU_OP1_TRUNC)) { - s = dn->src[0]; - if (!s || !s->def || !s->def->is_alu_inst()) - return false; - - if (dn->bc.src[0].neg != 1 || dn->bc.src[0].abs != 0 || - dn->bc.src[0].rel != 0) { - return false; - } - - dn = static_cast(s->def); - - } - - if (dn->bc.op_ptr->flags & AF_SET) { - a = dn; - return true; - } - } - return false; -} - -bool peephole::get_bool_op_info(value* b, bool_op_info& bop) { - - node *d = b->def; - - if (!d || !d->is_alu_inst()) - return false; - - alu_node *dn = static_cast(d); - - if (dn->bc.op_ptr->flags & AF_SET) { - bop.n = dn; - - if (dn->bc.op_ptr->flags & AF_DX10) - bop.int_cvt = true; - - return true; - } - - if (get_bool_flt_to_int_source(dn)) { - bop.n = dn; - bop.int_cvt = true; - return true; - } - - return false; -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_psi_ops.cpp b/src/gallium/drivers/r600/sb/sb_psi_ops.cpp deleted file mode 100644 index 7d0a31e..0000000 --- a/src/gallium/drivers/r600/sb/sb_psi_ops.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include "sb_shader.h" - -#include "sb_pass.h" - -namespace r600_sb { - -bool r600_sb::psi_ops::visit(alu_node& n, bool enter) { - if (enter) { - } - return false; -} - -bool psi_ops::visit(node& n, bool enter) { - if (enter) { - assert(n.subtype == NST_PSI); - - try_inline(n); - - // TODO eliminate predication until there is full support in all passes - // unpredicate instructions and replace psi-nodes with conditional moves - eliminate(n); - } - return false; -} - -value* get_pred_val(node &n) { - value *pred_val = NULL; - - for (vvec::iterator I = n.src.begin(), E = n.src.end(); I != E; I += 3) { - value* &pred = *I; - if (pred) { - if (!pred_val) - pred_val = pred; - else { - assert(pred == pred_val); - } - } - } - return pred_val; -} - -// for now we'll never inline psi's with different predicate values, -// so psi node may only contain the refs to one predicate value. -bool psi_ops::try_inline(node& n) { - assert(n.subtype == NST_PSI); - - vvec &ns = n.src; - - int sz = ns.size(); - assert(sz && (sz % 3 == 0)); - - value *pred_val = get_pred_val(n); - - int ps_mask = 0; - - bool r = false; - - for (int i = sz - 1; i >= 0; i -= 3) { - - if (ps_mask == 3) { - ns.erase(ns.begin(), ns.begin() + i + 1); - return r; - } - - value* val = ns[i]; - value* predsel = ns[i-1]; - int ps = !predsel ? 3 : predsel == sh.get_pred_sel(0) ? 1 : 2; - - assert(val->def); - - if (val->def->subtype == NST_PSI && ps == 3) { - if (get_pred_val(*val->def) != pred_val) - continue; - - vvec &ds = val->def->src; - - ns.insert(ns.begin() + i + 1, ds.begin(), ds.end()); - ns.erase(ns.begin() + i - 2, ns.begin() + i + 1); - i += ds.size(); - r = true; - - } else { - if ((ps_mask & ps) == ps) { - // this predicate select is subsumed by already handled ops - ns.erase(ns.begin() + i - 2, ns.begin() + i + 1); - } else { - ps_mask |= ps; - } - } - } - return r; -} - -bool psi_ops::try_reduce(node& n) { - assert(n.subtype == NST_PSI); - assert(n.src.size() % 3 == 0); - - // TODO - - return false; -} - -void psi_ops::unpredicate(node *n) { - - if (!n->is_alu_inst()) - return; - - alu_node *a = static_cast(n); - a->pred = NULL; -} - -bool psi_ops::eliminate(node& n) { - assert(n.subtype == NST_PSI); - assert(n.src.size() == 6); - - value *d = n.dst[0]; - - value *s1 = n.src[2]; - value *s2 = n.src[5]; - - value *pred = n.src[3]; - - bool psel = n.src[4] == sh.get_pred_sel(0); - - value *sel = get_select_value_for_em(sh, pred); - - if (s1->is_undef()) { - if (s2->is_undef()) { - - } else { - n.insert_after(sh.create_mov(d, s2)); - } - } else if (s2->is_undef()) { - n.insert_after(sh.create_mov(d, s1)); - } else { - alu_node *a = sh.create_alu(); - a->bc.set_op(ALU_OP3_CNDE_INT); - - a->dst.push_back(d); - a->src.push_back(sel); - - if (psel) { - a->src.push_back(s1); - a->src.push_back(s2); - } else { - a->src.push_back(s2); - a->src.push_back(s1); - } - - n.insert_after(a); - } - - n.remove(); - - if (s1->is_any_gpr() && !s1->is_undef() && s1->def) - unpredicate(s1->def); - if (s2->is_any_gpr() && !s2->is_undef() && s2->def) - unpredicate(s2->def); - - return false; -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_public.h b/src/gallium/drivers/r600/sb/sb_public.h deleted file mode 100644 index a90771f..0000000 --- a/src/gallium/drivers/r600/sb/sb_public.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#ifndef R600_SB_H_ -#define R600_SB_H_ - - -#ifdef __cplusplus -extern "C" { -#endif - - -struct r600_shader; - -void r600_sb_context_destroy(void *sctx); - -int r600_sb_bytecode_process(struct r600_context *rctx, - struct r600_bytecode *bc, - struct r600_shader *pshader, - int dump_source_bytecode, - int optimize); - - -#ifdef __cplusplus -} // extern "C" -#endif - - -#endif //R600_SB_H_ diff --git a/src/gallium/drivers/r600/sb/sb_ra_checker.cpp b/src/gallium/drivers/r600/sb/sb_ra_checker.cpp deleted file mode 100644 index 6b5c8e4..0000000 --- a/src/gallium/drivers/r600/sb/sb_ra_checker.cpp +++ /dev/null @@ -1,273 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -int ra_checker::run() { - - rm_stack.clear(); - rm_stack.resize(1); - rm_stk_level = 0; - - process_op_dst(sh.root); - - run_on(sh.root); - - assert(rm_stk_level == 0); - - dump_all_errors(); - - assert(sh.errors.empty()); - - return 0; -} - -void ra_checker::dump_error(const error_info &e) { - - sblog << "error at : "; - dump::dump_op(e.n); - - sblog << "\n"; - sblog << " : " << e.message << "\n"; -} - -void ra_checker::dump_all_errors() { - for (error_map::iterator I = sh.errors.begin(), E = sh.errors.end(); - I != E; ++I) { - dump_error(I->second); - } -} - - -void ra_checker::error(node *n, unsigned id, std::string msg) { - error_info e; - e.n = n; - e.arg_index = id; - e.message = msg; - sh.errors.insert(std::make_pair(n, e)); -} - -void ra_checker::push_stack() { - ++rm_stk_level; - if (rm_stack.size() == rm_stk_level) - rm_stack.push_back(rm_stack.back()); - else - rm_stack[rm_stk_level] = rm_stack[rm_stk_level - 1]; -} - -void ra_checker::pop_stack() { - --rm_stk_level; -} - -void ra_checker::kill_alu_only_regs() { - // TODO -} - -void ra_checker::check_value_gpr(node *n, unsigned id, value *v) { - sel_chan gpr = v->gpr; - if (!gpr) { - sb_ostringstream o; - o << "operand value " << *v << " is not allocated"; - error(n, id, o.str()); - return; - } - reg_value_map::iterator F = rmap().find(v->gpr); - if (F == rmap().end()) { - sb_ostringstream o; - o << "operand value " << *v << " was not previously written to its gpr"; - error(n, id, o.str()); - return; - } - if (!F->second->v_equal(v)) { - sb_ostringstream o; - o << "expected operand value " << *v - << ", gpr contains " << *(F->second); - error(n, id, o.str()); - return; - } - - -} - -void ra_checker::check_src_vec(node *n, unsigned id, vvec &vv, bool src) { - - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (!v || !v->is_sgpr()) - continue; - - if (v->is_rel()) { - if (!v->rel) { - sb_ostringstream o; - o << "expected relative offset in " << *v; - error(n, id, o.str()); - return; - } - } else if (src) { - check_value_gpr(n, id, v); - } - } -} - -void ra_checker::check_op_src(node *n) { - check_src_vec(n, 0, n->dst, false); - check_src_vec(n, 100, n->src, true); -} - -void ra_checker::process_op_dst(node *n) { - - unsigned id = 0; - - for (vvec::iterator I = n->dst.begin(), E = n->dst.end(); I != E; ++I) { - value *v = *I; - - ++id; - - if (!v) - continue; - - if (v->is_sgpr()) { - - if (!v->gpr) { - sb_ostringstream o; - o << "destination operand " << *v << " is not allocated"; - error(n, id, o.str()); - return; - } - - rmap()[v->gpr] = v; - } else if (v->is_rel()) { - if (v->rel->is_const()) { - rmap()[v->get_final_gpr()] = v; - } else { - unsigned sz = v->array->array_size; - unsigned start = v->array->gpr; - for (unsigned i = 0; i < sz; ++i) { - rmap()[start + (i << 2)] = v; - } - } - } - } -} - -void ra_checker::check_phi_src(container_node *p, unsigned id) { - for (node_iterator I = p->begin(), E = p->end(); I != E; ++I) { - node *n = *I; - value *s = n->src[id]; - if (s->is_sgpr()) - check_value_gpr(n, id, s); - } -} - -void ra_checker::process_phi_dst(container_node *p) { - for (node_iterator I = p->begin(), E = p->end(); I != E; ++I) { - node *n = *I; - process_op_dst(n); - } -} - -void ra_checker::check_alu_group(alu_group_node *g) { - - for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { - node *a = *I; - if (!a->is_alu_inst()) { - sb_ostringstream o; - o << "non-alu node inside alu group"; - error(a, 0, o.str()); - return; - } - - check_op_src(a); - } - - std::fill(prev_dst, prev_dst + 5, (value*)NULL); - - for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { - alu_node *a = static_cast(*I); - - process_op_dst(a); - - unsigned slot = a->bc.slot; - prev_dst[slot] = a->dst[0]; - } -} - -void ra_checker::run_on(container_node* c) { - - if (c->is_region()) { - region_node *r = static_cast(c); - if (r->loop_phi) { - check_phi_src(r->loop_phi, 0); - process_phi_dst(r->loop_phi); - } - } else if (c->is_depart()) { - - push_stack(); - - } else if (c->is_repeat()) { - - push_stack(); - - } - - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - node *n = *I; - - if(n->is_cf_inst() || n->is_fetch_inst()) { - check_op_src(n); - process_op_dst(n); - } - - if (n->is_container()) { - if (n->is_alu_group()) { - check_alu_group(static_cast(n)); - } else { - container_node *nc = static_cast(n); - run_on(nc); - } - } - } - - if (c->is_depart()) { - depart_node *r = static_cast(c); - check_phi_src(r->target->phi, r->dep_id); - pop_stack(); - } else if (c->is_repeat()) { - ASSERTED repeat_node *r = static_cast(c); - assert (r->target->loop_phi); - - pop_stack(); - } else if (c->is_region()) { - region_node *r = static_cast(c); - if (r->phi) - process_phi_dst(r->phi); - } -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp b/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp deleted file mode 100644 index 4a9462d..0000000 --- a/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp +++ /dev/null @@ -1,626 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#define RA_DEBUG 0 - -#if RA_DEBUG -#define RA_DUMP(q) do { q } while (0) -#else -#define RA_DUMP(q) -#endif - -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -int ra_coalesce::run() { - return sh.coal.run(); -} - -void coalescer::add_edge(value* a, value* b, unsigned cost) { - assert(a->is_sgpr() && b->is_sgpr()); - edges.insert(new ra_edge(a,b, cost)); -} - -void coalescer::create_chunk(value *v) { - - assert(v->is_sgpr()); - - ra_chunk *c = new ra_chunk(); - - c->values.push_back(v); - - if (v->is_chan_pinned()) - c->flags |= RCF_PIN_CHAN; - if (v->is_reg_pinned()) { - c->flags |= RCF_PIN_REG; - } - - c->pin = v->pin_gpr; - - RA_DUMP( - sblog << "create_chunk: "; - dump_chunk(c); - ); - - all_chunks.push_back(c); - v->chunk = c; - -} - -void coalescer::unify_chunks(ra_edge *e) { - ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk; - - RA_DUMP( - sblog << "unify_chunks: "; - dump_chunk(c1); - dump_chunk(c2); - ); - - if (c2->is_chan_pinned() && !c1->is_chan_pinned()) { - c1->flags |= RCF_PIN_CHAN; - c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan()); - } - - if (c2->is_reg_pinned() && !c1->is_reg_pinned()) { - c1->flags |= RCF_PIN_REG; - c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan()); - } - - c1->values.reserve(c1->values.size() + c2->values.size()); - - for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E; - ++I) { - (*I)->chunk = c1; - c1->values.push_back(*I); - } - - chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2); - assert(F != all_chunks.end()); - - all_chunks.erase(F); - - c1->cost += c2->cost + e->cost; - delete c2; -} - -bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) { - unsigned pin_flags = (c1->flags & c2->flags) & - (RCF_PIN_CHAN | RCF_PIN_REG); - - if ((pin_flags & RCF_PIN_CHAN) && - c1->pin.chan() != c2->pin.chan()) - return true; - - if ((pin_flags & RCF_PIN_REG) && - c1->pin.sel() != c2->pin.sel()) - return true; - - for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E; - ++I) { - value *v1 = *I; - - for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E; - ++I) { - value *v2 = *I; - - if (!v1->v_equal(v2) && v1->interferences.contains(v2)) - return true; - } - } - return false; -} - -void coalescer::build_chunks() { - - for (edge_queue::iterator I = edges.begin(), E = edges.end(); - I != E; ++I) { - - ra_edge *e = *I; - - if (!e->a->chunk) - create_chunk(e->a); - - if (!e->b->chunk) - create_chunk(e->b); - - ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk; - - if (c1 == c2) { - c1->cost += e->cost; - } else if (!chunks_interference(c1, c2)) - unify_chunks(e); - } -} - -ra_constraint* coalescer::create_constraint(constraint_kind kind) { - ra_constraint *c = new ra_constraint(kind); - all_constraints.push_back(c); - return c; -} - -void coalescer::dump_edges() { - sblog << "######## affinity edges\n"; - - for (edge_queue::iterator I = edges.begin(), E = edges.end(); - I != E; ++I) { - ra_edge* e = *I; - sblog << " ra_edge "; - dump::dump_val(e->a); - sblog << " <-> "; - dump::dump_val(e->b); - sblog << " cost = " << e->cost << "\n"; - } -} - -void coalescer::dump_chunks() { - sblog << "######## chunks\n"; - - for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end(); - I != E; ++I) { - ra_chunk* c = *I; - dump_chunk(c); - } -} - - -void coalescer::dump_constraint_queue() { - sblog << "######## constraints\n"; - - for (constraint_queue::iterator I = constraints.begin(), - E = constraints.end(); I != E; ++I) { - ra_constraint* c = *I; - dump_constraint(c); - } -} - -void coalescer::dump_chunk(ra_chunk* c) { - sblog << " ra_chunk cost = " << c->cost << " : "; - dump::dump_vec(c->values); - - if (c->flags & RCF_PIN_REG) - sblog << " REG = " << c->pin.sel(); - - if (c->flags & RCF_PIN_CHAN) - sblog << " CHAN = " << c->pin.chan(); - - sblog << (c->flags & RCF_GLOBAL ? " GLOBAL" : ""); - - sblog << "\n"; -} - -void coalescer::dump_constraint(ra_constraint* c) { - sblog << " ra_constraint: "; - switch (c->kind) { - case CK_PACKED_BS: sblog << "PACKED_BS"; break; - case CK_PHI: sblog << "PHI"; break; - case CK_SAME_REG: sblog << "SAME_REG"; break; - default: sblog << "UNKNOWN_KIND"; assert(0); break; - } - - sblog << " cost = " << c->cost << " : "; - dump::dump_vec(c->values); - - sblog << "\n"; -} - -void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) { - - for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E; - ++I) { - value *v = *I; - s.add_set(v->interferences); - } - s.remove_vec(c->values); -} - -void coalescer::build_chunk_queue() { - for (chunk_vec::iterator I = all_chunks.begin(), - E = all_chunks.end(); I != E; ++I) { - ra_chunk *c = *I; - - if (!c->is_fixed()) - chunks.insert(c); - } -} - -void coalescer::build_constraint_queue() { - for (constraint_vec::iterator I = all_constraints.begin(), - E = all_constraints.end(); I != E; ++I) { - ra_constraint *c = *I; - unsigned cost = 0; - - if (c->values.empty() || !c->values.front()->is_sgpr()) - continue; - - if (c->kind != CK_SAME_REG) - continue; - - for (vvec::iterator I = c->values.begin(), E = c->values.end(); - I != E; ++I) { - value *v = *I; - if (!v->chunk) - create_chunk(v); - else - cost += v->chunk->cost; - } - c->cost = cost; - constraints.insert(c); - } -} - -int coalescer::color_chunks() { - - for (chunk_queue::iterator I = chunks.begin(), E = chunks.end(); - I != E; ++I) { - ra_chunk *c = *I; - if (c->is_fixed() || c->values.size() == 1) - continue; - - sb_bitset rb; - val_set interf; - - get_chunk_interferences(c, interf); - - RA_DUMP( - sblog << "color_chunks: "; - dump_chunk(c); - sblog << "\n interferences: "; - dump::dump_set(sh,interf); - sblog << "\n"; - ); - - init_reg_bitset(rb, interf); - - unsigned pass = c->is_reg_pinned() ? 0 : 1; - - unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0; - unsigned ce = c->is_chan_pinned() ? cs + 1 : 4; - - unsigned color = 0; - - while (pass < 2) { - - unsigned rs, re; - - if (pass == 0) { - rs = c->pin.sel(); - re = rs + 1; - } else { - rs = 0; - re = sh.num_nontemp_gpr(); - } - - for (unsigned reg = rs; reg < re; ++reg) { - for (unsigned chan = cs; chan < ce; ++chan) { - unsigned bit = sel_chan(reg, chan); - if (bit >= rb.size() || !rb.get(bit)) { - color = bit; - break; - } - } - if (color) - break; - } - - if (color) - break; - - ++pass; - } - - if (!color) { - fprintf(stderr, "r600/SB: unable to color registers\n"); - return -1; - } - color_chunk(c, color); - } - return 0; -} - -void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) { - - for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) { - value *v = *I; - - if (!v->is_any_gpr()) - continue; - - unsigned gpr = v->get_final_gpr(); - if (!gpr) - continue; - - if (gpr) { - if (gpr >= bs.size()) - bs.resize(gpr + 64); - bs.set(gpr, 1); - } - } -} - -void coalescer::color_chunk(ra_chunk *c, sel_chan color) { - - vvec vv = c->values; - - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; - ++I) { - value *v = *I; - - if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) { - detach_value(v); - continue; - } - - if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) { - detach_value(v); - continue; - } - - v->gpr = color; - - if (v->constraint && v->constraint->kind == CK_PHI) - v->fix(); - - - RA_DUMP( - sblog << " assigned " << color << " to "; - dump::dump_val(v); - sblog << "\n"; - ); - } - - c->pin = color; - - if (c->is_reg_pinned()) { - c->fix(); - } -} - -coalescer::~coalescer() { - - // FIXME use pool allocator ?? - - for (constraint_vec::iterator I = all_constraints.begin(), - E = all_constraints.end(); I != E; ++I) { - delete (*I); - } - - for (chunk_vec::iterator I = all_chunks.begin(), - E = all_chunks.end(); I != E; ++I) { - delete (*I); - } - - for (edge_queue::iterator I = edges.begin(), E = edges.end(); - I != E; ++I) { - delete (*I); - } -} - -int coalescer::run() { - int r; - - RA_DUMP( dump_edges(); ); - - build_chunks(); - RA_DUMP( dump_chunks(); ); - - build_constraint_queue(); - RA_DUMP( dump_constraint_queue(); ); - - if ((r = color_constraints())) - return r; - - build_chunk_queue(); - return color_chunks(); -} - -void coalescer::color_phi_constraint(ra_constraint* c) { -} - -ra_chunk* coalescer::detach_value(value *v) { - - vvec::iterator F = std::find(v->chunk->values.begin(), - v->chunk->values.end(), v); - - assert(F != v->chunk->values.end()); - v->chunk->values.erase(F); - create_chunk(v); - - if (v->is_reg_pinned()) { - v->chunk->fix(); - } - - RA_DUMP( - sblog << " detached : "; - dump_chunk(v->chunk); - ); - - return v->chunk; - -} - -int coalescer::color_reg_constraint(ra_constraint *c) { - unsigned k, cnt = c->values.size(); - vvec & cv = c->values; - - ra_chunk *ch[4]; - unsigned swz[4] = {0, 1, 2, 3}; - val_set interf[4]; - sb_bitset rb[4]; - - bool reg_pinned = false; - unsigned pin_reg = ~0; - - unsigned chan_mask = 0; - - k = 0; - for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) { - value *v = *I; - - if (!v->chunk) - create_chunk(v); - - ch[k] = v->chunk; - - if (v->chunk->is_chan_pinned()) { - unsigned chan = 1 << v->chunk->pin.chan(); - - if (chan & chan_mask) { // channel already in use - ch[k] = detach_value(v); - assert(!ch[k]->is_chan_pinned()); - } else { - chan_mask |= chan; - } - } - - if (v->chunk->is_reg_pinned()) { - if (!reg_pinned) { - reg_pinned = true; - pin_reg = v->chunk->pin.sel(); - } - } - - get_chunk_interferences(ch[k], interf[k]); - init_reg_bitset(rb[k], interf[k]); - } - - unsigned start_reg, end_reg; - - start_reg = 0; - end_reg = sh.num_nontemp_gpr(); - - unsigned min_reg = end_reg; - unsigned min_swz[4]; - unsigned i, pass = reg_pinned ? 0 : 1; - - bool done = false; - - while (pass < 2) { - - unsigned rs, re; - - if (pass == 0) { - re = pin_reg + 1; - rs = pin_reg; - } else { - re = end_reg; - rs = start_reg; - } - - min_reg = re; - - // cycle on swizzle combinations - do { - for (i = 0; i < cnt; ++i) { - if (ch[i]->flags & RCF_PIN_CHAN) - if (ch[i]->pin.chan() != swz[i]) - break; - } - if (i != cnt) - continue; - - // looking for minimal reg number such that the constrained chunks - // may be colored with the current swizzle combination - for (unsigned reg = rs; reg < min_reg; ++reg) { - for (i = 0; i < cnt; ++i) { - unsigned bit = sel_chan(reg, swz[i]); - if (bit < rb[i].size() && rb[i].get(bit)) - break; - } - if (i == cnt) { - done = true; - min_reg = reg; - std::copy(swz, swz + 4, min_swz); - break; - } - } - - if (pass == 0 && done) - break; - - } while (std::next_permutation(swz, swz + 4)); - - if (!done && pass) { - sblog << "sb: ra_coalesce - out of registers\n"; - return -1; - } - - if (pass == 0 && done) - break; - - ++pass; - }; - - assert(done); - - RA_DUMP( - sblog << "min reg = " << min_reg << " min_swz = " - << min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n"; - ); - - for (i = 0; i < cnt; ++i) { - sel_chan color(min_reg, min_swz[i]); - ra_chunk *cc = ch[i]; - - if (cc->is_fixed()) { - if (cc->pin != color) - cc = detach_value(cv[i]); - else - continue; - } - - color_chunk(cc, color); - cc->fix(); - cc->set_prealloc(); - } - - return 0; -} - -int coalescer::color_constraints() { - int r; - - for (constraint_queue::iterator I = constraints.begin(), - E = constraints.end(); I != E; ++I) { - - ra_constraint *c = *I; - - RA_DUMP( - sblog << "color_constraints: "; - dump_constraint(c); - ); - - if (c->kind == CK_SAME_REG) { - if ((r = color_reg_constraint(c))) - return r; - } else if (c->kind == CK_PHI) - color_phi_constraint(c); - } - return 0; -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_ra_init.cpp b/src/gallium/drivers/r600/sb/sb_ra_init.cpp deleted file mode 100644 index e14b187..0000000 --- a/src/gallium/drivers/r600/sb/sb_ra_init.cpp +++ /dev/null @@ -1,856 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#define RA_DEBUG 0 - -#if RA_DEBUG -#define RA_DUMP(q) do { q } while (0) -#else -#define RA_DUMP(q) -#endif - -#include - -#include "sb_bc.h" -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -class regbits { - typedef uint32_t basetype; - static const unsigned bt_bytes = sizeof(basetype); - static const unsigned bt_index_shift = 5; - static const unsigned bt_index_mask = (1u << bt_index_shift) - 1; - static const unsigned bt_bits = bt_bytes << 3; - static const unsigned size = MAX_GPR * 4 / bt_bits; - - basetype dta[size]; - - unsigned num_temps; - -public: - - regbits(unsigned num_temps) : dta(), num_temps(num_temps) {} - regbits(unsigned num_temps, unsigned value) : num_temps(num_temps) - { set_all(value); } - - regbits(shader &sh, val_set &vs) : num_temps(sh.get_ctx().alu_temp_gprs) - { set_all(1); from_val_set(sh, vs); } - - void set_all(unsigned val); - void from_val_set(shader &sh, val_set &vs); - - void set(unsigned index); - void clear(unsigned index); - bool get(unsigned index); - - void set(unsigned index, unsigned val); - - sel_chan find_free_bit(); - sel_chan find_free_chans(unsigned mask); - sel_chan find_free_chan_by_mask(unsigned mask); - sel_chan find_free_array(unsigned size, unsigned mask); - - void dump(); -}; - -// ======================================= - -void regbits::dump() { - for (unsigned i = 0; i < size * bt_bits; ++i) { - - if (!(i & 31)) - sblog << "\n"; - - if (!(i & 3)) { - sblog.print_w(i / 4, 7); - sblog << " "; - } - - sblog << (get(i) ? 1 : 0); - } -} - - -void regbits::set_all(unsigned v) { - memset(&dta, v ? 0xFF : 0x00, size * bt_bytes); -} - -void regbits::from_val_set(shader &sh, val_set& vs) { - val_set &s = vs; - unsigned g; - for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) { - value *v = *I; - if (v->is_any_gpr()) { - g = v->get_final_gpr(); - if (!g) - continue; - } else - continue; - - assert(g); - --g; - assert(g < 512); - clear(g); - } -} - -void regbits::set(unsigned index) { - unsigned ih = index >> bt_index_shift; - unsigned il = index & bt_index_mask; - dta[ih] |= ((basetype)1u << il); -} - -void regbits::clear(unsigned index) { - unsigned ih = index >> bt_index_shift; - unsigned il = index & bt_index_mask; - assert(ih < size); - dta[ih] &= ~((basetype)1u << il); -} - -bool regbits::get(unsigned index) { - unsigned ih = index >> bt_index_shift; - unsigned il = index & bt_index_mask; - return dta[ih] & ((basetype)1u << il); -} - -void regbits::set(unsigned index, unsigned val) { - unsigned ih = index >> bt_index_shift; - unsigned il = index & bt_index_mask; - basetype bm = 1u << il; - dta[ih] = (dta[ih] & ~bm) | (val << il); -} - -// free register for ra means the bit is set -sel_chan regbits::find_free_bit() { - unsigned elt = 0; - unsigned bit = 0; - - while (elt < size && !dta[elt]) - ++elt; - - if (elt >= size) - return 0; - - bit = __builtin_ctz(dta[elt]) + (elt << bt_index_shift); - - assert(bit < ((MAX_GPR - num_temps) << 2)); - - return bit + 1; -} - -// find free gpr component to use as indirectly addressable array -sel_chan regbits::find_free_array(unsigned length, unsigned mask) { - unsigned cc[4] = {}; - - // FIXME optimize this. though hopefully we won't have a lot of arrays - for (unsigned a = 0; a < MAX_GPR - num_temps; ++a) { - for(unsigned c = 0; c < MAX_CHAN; ++c) { - if (mask & (1 << c)) { - if (get((a << 2) | c)) { - if (++cc[c] == length) - return sel_chan(a - length + 1, c); - } else { - cc[c] = 0; - } - } - } - } - return 0; -} - -sel_chan regbits::find_free_chans(unsigned mask) { - unsigned elt = 0; - unsigned bit = 0; - - assert (!(mask & ~0xF)); - basetype cd = dta[elt]; - - do { - if (!cd) { - if (++elt < size) { - cd = dta[elt]; - bit = 0; - continue; - } else - return 0; - } - - unsigned p = __builtin_ctz(cd) & ~(basetype)3u; - - assert (p <= bt_bits - bit); - bit += p; - cd >>= p; - - if ((cd & mask) == mask) { - return ((elt << bt_index_shift) | bit) + 1; - } - - bit += 4; - cd >>= 4; - - } while (1); - - return 0; -} - -sel_chan regbits::find_free_chan_by_mask(unsigned mask) { - unsigned elt = 0; - unsigned bit = 0; - - assert (!(mask & ~0xF)); - basetype cd = dta[elt]; - - do { - if (!cd) { - if (++elt < size) { - cd = dta[elt]; - bit = 0; - continue; - } else - return 0; - } - - unsigned p = __builtin_ctz(cd) & ~(basetype)3u; - - assert (p <= bt_bits - bit); - bit += p; - cd >>= p; - - if (cd & mask) { - unsigned nb = __builtin_ctz(cd & mask); - unsigned ofs = ((elt << bt_index_shift) | bit); - return nb + ofs + 1; - } - - bit += 4; - cd >>= 4; - - } while (1); - - return 0; -} - -// ================================ - -void ra_init::alloc_arrays() { - - gpr_array_vec &ga = sh.arrays(); - - for(gpr_array_vec::iterator I = ga.begin(), E = ga.end(); I != E; ++I) { - gpr_array *a = *I; - - RA_DUMP( - sblog << "array [" << a->array_size << "] at " << a->base_gpr << "\n"; - sblog << "\n"; - ); - - // skip preallocated arrays (e.g. with preloaded inputs) - if (a->gpr) { - RA_DUMP( sblog << " FIXED at " << a->gpr << "\n"; ); - continue; - } - - bool dead = a->is_dead(); - - if (dead) { - RA_DUMP( sblog << " DEAD\n"; ); - continue; - } - - val_set &s = a->interferences; - - - for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) { - value *v = *I; - if (v->array == a) - s.remove_val(v); - } - - RA_DUMP( - sblog << " interf: "; - dump::dump_set(sh, s); - sblog << "\n"; - ); - - regbits rb(sh, s); - - sel_chan base = rb.find_free_array(a->array_size, - (1 << a->base_gpr.chan())); - - RA_DUMP( sblog << " found base: " << base << "\n"; ); - - a->gpr = base; - } -} - - -int ra_init::run() { - - alloc_arrays(); - - return ra_node(sh.root) ? 0 : 1; -} - -bool ra_init::ra_node(container_node* c) { - - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - node *n = *I; - if (n->type == NT_OP) { - if (!process_op(n)) - return false; - } - if (n->is_container() && !n->is_alu_packed()) { - if (!ra_node(static_cast(n))) - return false; - } - } - return true; -} - -bool ra_init::process_op(node* n) { - - bool copy = n->is_copy_mov(); - - RA_DUMP( - sblog << "ra_init: process_op : "; - dump::dump_op(n); - sblog << "\n"; - ); - - if (n->is_alu_packed()) { - for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) { - value *v = *I; - if (v && v->is_sgpr() && v->constraint && - v->constraint->kind == CK_PACKED_BS) { - color_bs_constraint(v->constraint); - break; - } - } - } - - if (n->is_fetch_inst() || n->is_cf_inst()) { - for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) { - value *v = *I; - if (v && v->is_sgpr()) - if (!color(v)) - return false; - } - } - - for (vvec::iterator I = n->dst.begin(), E = n->dst.end(); I != E; ++I) { - value *v = *I; - if (!v) - continue; - if (v->is_sgpr()) { - if (!v->gpr) { - if (copy && !v->constraint) { - value *s = *(n->src.begin() + (I - n->dst.begin())); - assert(s); - if (s->is_sgpr()) { - assign_color(v, s->gpr); - } - } else - if (!color(v)) - return false; - } - } - } - return true; -} - -void ra_init::color_bs_constraint(ra_constraint* c) { - vvec &vv = c->values; - assert(vv.size() <= 8); - - RA_DUMP( - sblog << "color_bs_constraint: "; - dump::dump_vec(vv); - sblog << "\n"; - ); - - regbits rb(ctx.alu_temp_gprs); - - unsigned chan_count[4] = {}; - unsigned allowed_chans = 0x0F; - - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - - if (!v || v->is_dead()) - continue; - - sel_chan gpr = v->get_final_gpr(); - - val_set interf; - - if (v->chunk) - sh.coal.get_chunk_interferences(v->chunk, interf); - else - interf = v->interferences; - - RA_DUMP( - sblog << " processing " << *v << " interferences : "; - dump::dump_set(sh, interf); - sblog << "\n"; - ); - - if (gpr) { - unsigned chan = gpr.chan(); - if (chan_count[chan] < 3) { - ++chan_count[chan]; - continue; - } else { - v->flags &= ~VLF_FIXED; - allowed_chans &= ~(1 << chan); - assert(allowed_chans); - } - } - - v->gpr = 0; - - gpr = 1; - rb.set_all(1); - - - rb.from_val_set(sh, interf); - - RA_DUMP( - sblog << " regbits : "; - rb.dump(); - sblog << "\n"; - ); - - while (allowed_chans && gpr.sel() < sh.num_nontemp_gpr()) { - - while (rb.get(gpr - 1) == 0) - gpr = gpr + 1; - - RA_DUMP( - sblog << " trying " << gpr << "\n"; - ); - - unsigned chan = gpr.chan(); - if (chan_count[chan] < 3) { - ++chan_count[chan]; - - if (v->chunk) { - vvec::iterator F = std::find(v->chunk->values.begin(), - v->chunk->values.end(), - v); - v->chunk->values.erase(F); - v->chunk = NULL; - } - - assign_color(v, gpr); - break; - } else { - allowed_chans &= ~(1 << chan); - } - gpr = gpr + 1; - } - - if (!gpr) { - sblog << "color_bs_constraint: failed...\n"; - assert(!"coloring failed"); - } - } -} - -bool ra_init::color(value* v) { - - if (v->constraint && v->constraint->kind == CK_PACKED_BS) { - color_bs_constraint(v->constraint); - return true; - } - - if (v->chunk && v->chunk->is_fixed()) - return true; - - RA_DUMP( - sblog << "coloring "; - dump::dump_val(v); - sblog << " interferences "; - dump::dump_set(sh, v->interferences); - sblog << "\n"; - ); - - if (v->is_reg_pinned()) { - assert(v->is_chan_pinned()); - assign_color(v, v->pin_gpr); - return true; - } - - regbits rb(sh, v->interferences); - sel_chan c; - - if (v->is_chan_pinned()) { - unsigned mask = 1 << v->pin_gpr.chan(); - c = rb.find_free_chans(mask) + v->pin_gpr.chan(); - } else { - unsigned cm = get_preferable_chan_mask(); - c = rb.find_free_chan_by_mask(cm); - } - - if (!c || c.sel() >= 128 - ctx.alu_temp_gprs) - return false; - assign_color(v, c); - return true; -} - -void ra_init::assign_color(value* v, sel_chan c) { - add_prev_chan(c.chan()); - v->gpr = c; - RA_DUMP( - sblog << "colored "; - dump::dump_val(v); - sblog << " to " << c << "\n"; - ); -} - -// =================================================== - -int ra_split::run() { - split(sh.root); - return 0; -} - -void ra_split::split_phi_src(container_node *loc, container_node *c, - unsigned id, bool loop) { - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - node *p = *I; - value* &v = p->src[id], *d = p->dst[0]; - assert(v); - - if (!d->is_sgpr() || v->is_undef()) - continue; - - value *t = sh.create_temp_value(); - alu_node* n = sh.create_copy_mov(t, v); - if (loop) - n->flags |= NF_DONT_MOVE; - if (loop && id == 0) - loc->insert_before(n); - else - loc->push_back(n); - v = t; - - sh.coal.add_edge(v, d, coalescer::phi_cost); - } -} - -void ra_split::split_phi_dst(node* loc, container_node *c, bool loop) { - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - node *p = *I; - value* &v = p->dst[0]; - assert(v); - - if (!v->is_sgpr()) - continue; - - value *t = sh.create_temp_value(); - node *cp = sh.create_copy_mov(v, t); - if (loop) { - cp->flags |= NF_DONT_MOVE; - static_cast(loc)->push_front(cp); - } else - loc->insert_after(cp); - v = t; - } -} - - -void ra_split::init_phi_constraints(container_node *c) { - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - node *p = *I; - ra_constraint *cc = sh.coal.create_constraint(CK_PHI); - cc->values.push_back(p->dst[0]); - - for (vvec::iterator I = p->src.begin(), E = p->src.end(); I != E; ++I) { - value *v = *I; - if (v->is_sgpr()) - cc->values.push_back(v); - } - - cc->update_values(); - } -} - -void ra_split::split(container_node* n) { - - if (n->type == NT_DEPART) { - depart_node *d = static_cast(n); - if (d->target->phi) - split_phi_src(d, d->target->phi, d->dep_id, false); - } else if (n->type == NT_REPEAT) { - repeat_node *r = static_cast(n); - if (r->target->loop_phi) - split_phi_src(r, r->target->loop_phi, r->rep_id, true); - } else if (n->type == NT_REGION) { - region_node *r = static_cast(n); - if (r->phi) { - split_phi_dst(r, r->phi, false); - } - if (r->loop_phi) { - split_phi_dst(r->get_entry_code_location(), r->loop_phi, - true); - split_phi_src(r, r->loop_phi, 0, true); - } - } - - for (node_riterator N, I = n->rbegin(), E = n->rend(); I != E; I = N) { - N = I; - ++N; - node *o = *I; - if (o->type == NT_OP) { - split_op(o); - } else if (o->is_container()) { - split(static_cast(o)); - } - } - - if (n->type == NT_REGION) { - region_node *r = static_cast(n); - if (r->phi) - init_phi_constraints(r->phi); - if (r->loop_phi) - init_phi_constraints(r->loop_phi); - } -} - -void ra_split::split_op(node* n) { - switch(n->subtype) { - case NST_ALU_PACKED_INST: - split_alu_packed(static_cast(n)); - break; - case NST_FETCH_INST: - case NST_CF_INST: - split_vector_inst(n); - default: - break; - } -} - -void ra_split::split_packed_ins(alu_packed_node *n) { - vvec vv = n->src; - vvec sv, dv; - - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - - value *&v = *I; - - if (v && v->is_any_gpr() && !v->is_undef()) { - - vvec::iterator F = std::find(sv.begin(), sv.end(), v); - value *t; - - if (F != sv.end()) { - t = *(dv.begin() + (F - sv.begin())); - } else { - t = sh.create_temp_value(); - sv.push_back(v); - dv.push_back(t); - } - v = t; - } - } - - unsigned cnt = sv.size(); - - if (cnt > 0) { - n->src = vv; - for (vvec::iterator SI = sv.begin(), DI = dv.begin(), SE = sv.end(); - SI != SE; ++SI, ++DI) { - n->insert_before(sh.create_copy_mov(*DI, *SI)); - } - - ra_constraint *c = sh.coal.create_constraint(CK_PACKED_BS); - c->values = dv; - c->update_values(); - } -} - -// TODO handle other packed ops for cayman -void ra_split::split_alu_packed(alu_packed_node* n) { - switch (n->op()) { - case ALU_OP2_DOT4: - case ALU_OP2_DOT4_IEEE: - case ALU_OP2_CUBE: - split_packed_ins(n); - break; - default: - break; - } -} - -void ra_split::split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz) { - unsigned ch = 0; - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I, ++ch) { - - value* &o = *I; - - if (o) { - - assert(!o->is_dead()); - - if (o->is_undef() || o->is_geometry_emit() || o->is_scratch()) - continue; - - if (allow_swz && o->is_float_0_or_1()) - continue; - - value *t; - vvec::iterator F = - allow_swz ? std::find(v2.begin(), v2.end(), o) : v2.end(); - - if (F != v2.end()) { - t = *(v1.begin() + (F - v2.begin())); - } else { - t = sh.create_temp_value(); - - if (!allow_swz) { - t->flags |= VLF_PIN_CHAN; - t->pin_gpr = sel_chan(0, ch); - } - - v2.push_back(o); - v1.push_back(t); - } - o = t; - } - } -} - -void ra_split::split_vector_inst(node* n) { - ra_constraint *c; - - bool call_fs = n->is_cf_op(CF_OP_CALL_FS); - bool no_src_swizzle = n->is_cf_inst() && (n->cf_op_flags() & CF_MEM); - - no_src_swizzle |= n->is_fetch_op(FETCH_OP_VFETCH) || - n->is_fetch_op(FETCH_OP_SEMFETCH); - - no_src_swizzle |= n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS); - - if (!n->src.empty() && !call_fs) { - - // we may have more than one source vector - - // fetch instructions with FF_USEGRAD have gradient values in - // src vectors 1 (src[4-7] and 2 (src[8-11]) - - unsigned nvec = n->src.size() >> 2; - assert(nvec << 2 <= n->src.size()); - - for (unsigned nv = 0; nv < nvec; ++nv) { - vvec sv, tv, nsrc(4); - unsigned arg_start = nv << 2; - - std::copy(n->src.begin() + arg_start, - n->src.begin() + arg_start + 4, - nsrc.begin()); - - split_vec(nsrc, tv, sv, !no_src_swizzle); - - unsigned cnt = sv.size(); - - if (no_src_swizzle || cnt) { - - std::copy(nsrc.begin(), nsrc.end(), n->src.begin() + arg_start); - - for(unsigned i = 0, s = tv.size(); i < s; ++i) { - n->insert_before(sh.create_copy_mov(tv[i], sv[i])); - } - - c = sh.coal.create_constraint(CK_SAME_REG); - c->values = tv; - c->update_values(); - } - } - } - - if (!n->dst.empty()) { - vvec sv, tv, ndst = n->dst; - - split_vec(ndst, tv, sv, true); - - if (sv.size()) { - n->dst = ndst; - - node *lp = n; - for(unsigned i = 0, s = tv.size(); i < s; ++i) { - lp->insert_after(sh.create_copy_mov(sv[i], tv[i])); - lp = lp->next; - } - - if (call_fs) { - for (unsigned i = 0, cnt = tv.size(); i < cnt; ++i) { - value *v = tv[i]; - value *s = sv[i]; - if (!v) - continue; - - v->flags |= VLF_PIN_REG | VLF_PIN_CHAN; - s->flags &= ~(VLF_PIN_REG | VLF_PIN_CHAN); - sel_chan sel; - - if (s->is_rel()) { - assert(s->rel->is_const()); - sel = sel_chan(s->select.sel() + - s->rel->get_const_value().u, - s->select.chan()); - } else - sel = s->select; - - v->gpr = v->pin_gpr = sel; - v->fix(); - } - } else { - c = sh.coal.create_constraint(CK_SAME_REG); - c->values = tv; - c->update_values(); - } - } - } -} - -void ra_init::add_prev_chan(unsigned chan) { - prev_chans = (prev_chans << 4) | (1 << chan); -} - -unsigned ra_init::get_preferable_chan_mask() { - unsigned i, used_chans = 0; - unsigned chans = prev_chans; - - for (i = 0; i < ra_tune; ++i) { - used_chans |= chans; - chans >>= 4; - } - - return (~used_chans) & 0xF; -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp deleted file mode 100644 index 72dbb81..0000000 --- a/src/gallium/drivers/r600/sb/sb_sched.cpp +++ /dev/null @@ -1,2210 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#define PSC_DEBUG 0 - -#if PSC_DEBUG -#define PSC_DUMP(a) do { a } while (0) -#else -#define PSC_DUMP(a) -#endif - -#include "sb_bc.h" -#include "sb_shader.h" -#include "sb_pass.h" -#include "sb_sched.h" -#include "eg_sq.h" // V_SQ_CF_INDEX_NONE/0/1 - -namespace r600_sb { - -rp_kcache_tracker::rp_kcache_tracker(shader &sh) : rp(), uc(), - // FIXME: for now we'll use "two const pairs" limit for r600, same as - // for other chips, otherwise additional check in alu_group_tracker is - // required to make sure that all 4 consts in the group fit into 2 - // kcache sets - sel_count(2) {} - -bool rp_kcache_tracker::try_reserve(sel_chan r) { - unsigned sel = kc_sel(r); - - for (unsigned i = 0; i < sel_count; ++i) { - if (rp[i] == 0) { - rp[i] = sel; - ++uc[i]; - return true; - } - if (rp[i] == sel) { - ++uc[i]; - return true; - } - } - return false; -} - -bool rp_kcache_tracker::try_reserve(node* n) { - bool need_unreserve = false; - vvec::iterator I(n->src.begin()), E(n->src.end()); - - for (; I != E; ++I) { - value *v = *I; - if (v->is_kcache()) { - if (!try_reserve(v->select)) - break; - else - need_unreserve = true; - } - } - if (I == E) - return true; - - if (need_unreserve && I != n->src.begin()) { - do { - --I; - value *v =*I; - if (v->is_kcache()) - unreserve(v->select); - } while (I != n->src.begin()); - } - return false; -} - -inline -void rp_kcache_tracker::unreserve(node* n) { - vvec::iterator I(n->src.begin()), E(n->src.end()); - for (; I != E; ++I) { - value *v = *I; - if (v->is_kcache()) - unreserve(v->select); - } -} - -void rp_kcache_tracker::unreserve(sel_chan r) { - unsigned sel = kc_sel(r); - - for (unsigned i = 0; i < sel_count; ++i) - if (rp[i] == sel) { - if (--uc[i] == 0) - rp[i] = 0; - return; - } - assert(0); - return; -} - -bool literal_tracker::try_reserve(alu_node* n) { - bool need_unreserve = false; - - vvec::iterator I(n->src.begin()), E(n->src.end()); - - for (; I != E; ++I) { - value *v = *I; - if (v->is_literal()) { - if (!try_reserve(v->literal_value)) - break; - else - need_unreserve = true; - } - } - if (I == E) - return true; - - if (need_unreserve && I != n->src.begin()) { - do { - --I; - value *v =*I; - if (v->is_literal()) - unreserve(v->literal_value); - } while (I != n->src.begin()); - } - return false; -} - -void literal_tracker::unreserve(alu_node* n) { - unsigned nsrc = n->bc.op_ptr->src_count, i; - - for (i = 0; i < nsrc; ++i) { - value *v = n->src[i]; - if (v->is_literal()) - unreserve(v->literal_value); - } -} - -bool literal_tracker::try_reserve(literal l) { - - PSC_DUMP( sblog << "literal reserve " << l.u << " " << l.f << "\n"; ); - - for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) { - if (lt[i] == 0) { - lt[i] = l; - ++uc[i]; - PSC_DUMP( sblog << " reserved new uc = " << uc[i] << "\n"; ); - return true; - } else if (lt[i] == l) { - ++uc[i]; - PSC_DUMP( sblog << " reserved uc = " << uc[i] << "\n"; ); - return true; - } - } - PSC_DUMP( sblog << " failed to reserve literal\n"; ); - return false; -} - -void literal_tracker::unreserve(literal l) { - - PSC_DUMP( sblog << "literal unreserve " << l.u << " " << l.f << "\n"; ); - - for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) { - if (lt[i] == l) { - if (--uc[i] == 0) - lt[i] = 0; - return; - } - } - assert(0); - return; -} - -static inline unsigned bs_cycle_vector(unsigned bs, unsigned src) { - static const unsigned swz[VEC_NUM][3] = { - {0, 1, 2}, {0, 2, 1}, {1, 2, 0}, {1, 0, 2}, {2, 0, 1}, {2, 1, 0} - }; - assert(bs < VEC_NUM && src < 3); - return swz[bs][src]; -} - -static inline unsigned bs_cycle_scalar(unsigned bs, unsigned src) { - static const unsigned swz[SCL_NUM][3] = { - {2, 1, 0}, {1, 2, 2}, {2, 1, 2}, {2, 2, 1} - }; - - if (bs >= SCL_NUM || src >= 3) { - // this prevents gcc warning "array subscript is above array bounds" - // AFAICS we should never hit this path - abort(); - } - return swz[bs][src]; -} - -static inline unsigned bs_cycle(bool trans, unsigned bs, unsigned src) { - return trans ? bs_cycle_scalar(bs, src) : bs_cycle_vector(bs, src); -} - -inline -bool rp_gpr_tracker::try_reserve(unsigned cycle, unsigned sel, unsigned chan) { - ++sel; - if (rp[cycle][chan] == 0) { - rp[cycle][chan] = sel; - ++uc[cycle][chan]; - return true; - } else if (rp[cycle][chan] == sel) { - ++uc[cycle][chan]; - return true; - } - return false; -} - -inline -void rp_gpr_tracker::unreserve(alu_node* n) { - unsigned nsrc = n->bc.op_ptr->src_count, i; - unsigned trans = n->bc.slot == SLOT_TRANS; - unsigned bs = n->bc.bank_swizzle; - unsigned opt = !trans - && n->bc.src[0].sel == n->bc.src[1].sel - && n->bc.src[0].chan == n->bc.src[1].chan; - - for (i = 0; i < nsrc; ++i) { - value *v = n->src[i]; - if (v->is_readonly() || v->is_undef()) - continue; - if (i == 1 && opt) - continue; - unsigned cycle = bs_cycle(trans, bs, i); - unreserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan); - } -} - -inline -void rp_gpr_tracker::unreserve(unsigned cycle, unsigned sel, unsigned chan) { - ++sel; - assert(rp[cycle][chan] == sel && uc[cycle][chan]); - if (--uc[cycle][chan] == 0) - rp[cycle][chan] = 0; -} - -inline -bool rp_gpr_tracker::try_reserve(alu_node* n) { - unsigned nsrc = n->bc.op_ptr->src_count, i; - unsigned trans = n->bc.slot == SLOT_TRANS; - unsigned bs = n->bc.bank_swizzle; - unsigned opt = !trans && nsrc >= 2 && - n->src[0] == n->src[1]; - - bool need_unreserve = false; - unsigned const_count = 0, min_gpr_cycle = 3; - - for (i = 0; i < nsrc; ++i) { - value *v = n->src[i]; - if (v->is_readonly() || v->is_undef()) { - const_count++; - if (trans && const_count == 3) - break; - } else { - if (i == 1 && opt) - continue; - - unsigned cycle = bs_cycle(trans, bs, i); - - if (trans && cycle < min_gpr_cycle) - min_gpr_cycle = cycle; - - if (const_count && cycle < const_count && trans) - break; - - if (!try_reserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan)) - break; - else - need_unreserve = true; - } - } - - if ((i == nsrc) && (min_gpr_cycle + 1 > const_count)) - return true; - - if (need_unreserve && i--) { - do { - value *v = n->src[i]; - if (!v->is_readonly() && !v->is_undef()) { - if (i == 1 && opt) - continue; - unreserve(bs_cycle(trans, bs, i), n->bc.src[i].sel, - n->bc.src[i].chan); - } - } while (i--); - } - return false; -} - -alu_group_tracker::alu_group_tracker(shader &sh) - : sh(sh), kc(sh), - gpr(), lt(), slots(), - max_slots(sh.get_ctx().is_cayman() ? 4 : 5), - has_mova(), uses_ar(), has_predset(), has_kill(), - updates_exec_mask(), consumes_lds_oqa(), produces_lds_oqa(), chan_count(), interp_param(), next_id() { - - available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F; -} - -inline -sel_chan alu_group_tracker::get_value_id(value* v) { - unsigned &id = vmap[v]; - if (!id) - id = ++next_id; - return sel_chan(id, v->get_final_chan()); -} - -inline -void alu_group_tracker::assign_slot(unsigned slot, alu_node* n) { - update_flags(n); - slots[slot] = n; - available_slots &= ~(1 << slot); - - unsigned param = n->interp_param(); - - if (param) { - assert(!interp_param || interp_param == param); - interp_param = param; - } -} - - -void alu_group_tracker::discard_all_slots(container_node &removed_nodes) { - PSC_DUMP( sblog << "agt::discard_all_slots\n"; ); - discard_slots(~available_slots & ((1 << max_slots) - 1), removed_nodes); -} - -void alu_group_tracker::discard_slots(unsigned slot_mask, - container_node &removed_nodes) { - - PSC_DUMP( - sblog << "discard_slots : packed_ops : " - << (unsigned)packed_ops.size() << "\n"; - ); - - for (node_vec::iterator N, I = packed_ops.begin(); - I != packed_ops.end(); I = N) { - N = I; ++N; - - alu_packed_node *n = static_cast(*I); - unsigned pslots = n->get_slot_mask(); - - PSC_DUMP( - sblog << "discard_slots : packed slot_mask : " << pslots << "\n"; - ); - - if (pslots & slot_mask) { - - PSC_DUMP( - sblog << "discard_slots : discarding packed...\n"; - ); - - removed_nodes.push_back(n); - slot_mask &= ~pslots; - N = packed_ops.erase(I); - available_slots |= pslots; - for (unsigned k = 0; k < max_slots; ++k) { - if (pslots & (1 << k)) - slots[k] = NULL; - } - } - } - - for (unsigned slot = 0; slot < max_slots; ++slot) { - unsigned slot_bit = 1 << slot; - - if (slot_mask & slot_bit) { - assert(!(available_slots & slot_bit)); - assert(slots[slot]); - - assert(!(slots[slot]->bc.slot_flags & AF_4SLOT)); - - PSC_DUMP( - sblog << "discarding slot " << slot << " : "; - dump::dump_op(slots[slot]); - sblog << "\n"; - ); - - removed_nodes.push_back(slots[slot]); - slots[slot] = NULL; - available_slots |= slot_bit; - } - } - - alu_node *t = slots[4]; - if (t && (t->bc.slot_flags & AF_V)) { - unsigned chan = t->bc.dst_chan; - if (!slots[chan]) { - PSC_DUMP( - sblog << "moving "; - dump::dump_op(t); - sblog << " from trans slot to free slot " << chan << "\n"; - ); - - slots[chan] = t; - slots[4] = NULL; - t->bc.slot = chan; - } - } - - reinit(); -} - -alu_group_node* alu_group_tracker::emit() { - - alu_group_node *g = sh.create_alu_group(); - - lt.init_group_literals(g); - - for (unsigned i = 0; i < max_slots; ++i) { - alu_node *n = slots[i]; - if (n) { - g->push_back(n); - } - } - return g; -} - -bool alu_group_tracker::try_reserve(alu_node* n) { - unsigned nsrc = n->bc.op_ptr->src_count; - unsigned slot = n->bc.slot; - bool trans = slot == 4; - - if (slots[slot]) - return false; - - unsigned flags = n->bc.op_ptr->flags; - - unsigned param = n->interp_param(); - - if (param && interp_param && interp_param != param) - return false; - - if ((flags & AF_KILL) && has_predset) - return false; - if ((flags & AF_ANY_PRED) && (has_kill || has_predset)) - return false; - if ((flags & AF_MOVA) && (has_mova || uses_ar)) - return false; - - if (n->uses_ar() && has_mova) - return false; - - if (consumes_lds_oqa) - return false; - if (n->consumes_lds_oq() && available_slots != (sh.get_ctx().has_trans ? 0x1F : 0x0F)) - return false; - for (unsigned i = 0; i < nsrc; ++i) { - - unsigned last_id = next_id; - - value *v = n->src[i]; - if (!v->is_any_gpr() && !v->is_rel()) - continue; - sel_chan vid = get_value_id(n->src[i]); - - if (vid > last_id && chan_count[vid.chan()] == 3) { - return false; - } - - n->bc.src[i].sel = vid.sel(); - n->bc.src[i].chan = vid.chan(); - } - - if (!lt.try_reserve(n)) - return false; - - if (!kc.try_reserve(n)) { - lt.unreserve(n); - return false; - } - - unsigned fbs = n->forced_bank_swizzle(); - - n->bc.bank_swizzle = 0; - - if (!trans && fbs) - n->bc.bank_swizzle = VEC_210; - - if (gpr.try_reserve(n)) { - assign_slot(slot, n); - return true; - } - - if (!fbs) { - unsigned swz_num = trans ? SCL_NUM : VEC_NUM; - for (unsigned bs = 0; bs < swz_num; ++bs) { - n->bc.bank_swizzle = bs; - if (gpr.try_reserve(n)) { - assign_slot(slot, n); - return true; - } - } - } - - gpr.reset(); - - slots[slot] = n; - UNUSED unsigned forced_swz_slots = 0; - int first_slot = ~0, first_nf = ~0, last_slot = ~0; - unsigned save_bs[5]; - - for (unsigned i = 0; i < max_slots; ++i) { - alu_node *a = slots[i]; - if (a) { - if (first_slot == ~0) - first_slot = i; - last_slot = i; - save_bs[i] = a->bc.bank_swizzle; - if (a->forced_bank_swizzle()) { - assert(i != SLOT_TRANS); - forced_swz_slots |= (1 << i); - a->bc.bank_swizzle = VEC_210; - if (!gpr.try_reserve(a)) - assert(!"internal reservation error"); - } else { - if (first_nf == ~0) - first_nf = i; - - a->bc.bank_swizzle = 0; - } - } - } - - if (first_nf == ~0) { - assign_slot(slot, n); - return true; - } - - assert(first_slot != ~0 && last_slot != ~0); - - // silence "array subscript is above array bounds" with gcc 4.8 - if (last_slot >= 5) - abort(); - - int i = first_nf; - alu_node *a = slots[i]; - bool backtrack = false; - - while (1) { - - PSC_DUMP( - sblog << " bs: trying s" << i << " bs:" << a->bc.bank_swizzle - << " bt:" << backtrack << "\n"; - ); - - if (!backtrack && gpr.try_reserve(a)) { - PSC_DUMP( - sblog << " bs: reserved s" << i << " bs:" << a->bc.bank_swizzle - << "\n"; - ); - - while ((++i <= last_slot) && !slots[i]); - if (i <= last_slot) - a = slots[i]; - else - break; - } else { - bool itrans = i == SLOT_TRANS; - unsigned max_swz = itrans ? SCL_221 : VEC_210; - - if (a->bc.bank_swizzle < max_swz) { - ++a->bc.bank_swizzle; - - PSC_DUMP( - sblog << " bs: inc s" << i << " bs:" << a->bc.bank_swizzle - << "\n"; - ); - - } else { - - a->bc.bank_swizzle = 0; - while ((--i >= first_nf) && !slots[i]); - if (i < first_nf) - break; - a = slots[i]; - PSC_DUMP( - sblog << " bs: unreserve s" << i << " bs:" << a->bc.bank_swizzle - << "\n"; - ); - gpr.unreserve(a); - backtrack = true; - - continue; - } - } - backtrack = false; - } - - if (i == last_slot + 1) { - assign_slot(slot, n); - return true; - } - - // reservation failed, restore previous state - slots[slot] = NULL; - gpr.reset(); - for (unsigned i = 0; i < max_slots; ++i) { - alu_node *a = slots[i]; - if (a) { - a->bc.bank_swizzle = save_bs[i]; - ASSERTED bool b = gpr.try_reserve(a); - assert(b); - } - } - - kc.unreserve(n); - lt.unreserve(n); - return false; -} - -bool alu_group_tracker::try_reserve(alu_packed_node* p) { - bool need_unreserve = false; - node_iterator I(p->begin()), E(p->end()); - - for (; I != E; ++I) { - alu_node *n = static_cast(*I); - if (!try_reserve(n)) - break; - else - need_unreserve = true; - } - - if (I == E) { - packed_ops.push_back(p); - return true; - } - - if (need_unreserve) { - while (--I != E) { - alu_node *n = static_cast(*I); - slots[n->bc.slot] = NULL; - } - reinit(); - } - return false; -} - -void alu_group_tracker::reinit() { - alu_node * s[5]; - memcpy(s, slots, sizeof(slots)); - - reset(true); - - for (int i = max_slots - 1; i >= 0; --i) { - if (s[i] && !try_reserve(s[i])) { - sblog << "alu_group_tracker: reinit error on slot " << i << "\n"; - for (unsigned i = 0; i < max_slots; ++i) { - sblog << " slot " << i << " : "; - if (s[i]) - dump::dump_op(s[i]); - - sblog << "\n"; - } - assert(!"alu_group_tracker: reinit error"); - } - } -} - -void alu_group_tracker::reset(bool keep_packed) { - kc.reset(); - gpr.reset(); - lt.reset(); - memset(slots, 0, sizeof(slots)); - vmap.clear(); - next_id = 0; - produces_lds_oqa = 0; - consumes_lds_oqa = 0; - has_mova = false; - uses_ar = false; - has_predset = false; - has_kill = false; - updates_exec_mask = false; - available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F; - interp_param = 0; - - chan_count[0] = 0; - chan_count[1] = 0; - chan_count[2] = 0; - chan_count[3] = 0; - - if (!keep_packed) - packed_ops.clear(); -} - -void alu_group_tracker::update_flags(alu_node* n) { - unsigned flags = n->bc.op_ptr->flags; - has_kill |= (flags & AF_KILL); - has_mova |= (flags & AF_MOVA); - has_predset |= (flags & AF_ANY_PRED); - uses_ar |= n->uses_ar(); - consumes_lds_oqa |= n->consumes_lds_oq(); - produces_lds_oqa |= n->produces_lds_oq(); - if (flags & AF_ANY_PRED) { - if (n->dst[2] != NULL) - updates_exec_mask = true; - } -} - -int post_scheduler::run() { - return run_on(sh.root) ? 0 : 1; -} - -bool post_scheduler::run_on(container_node* n) { - int r = true; - for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) { - if (I->is_container()) { - if (I->subtype == NST_BB) { - bb_node* bb = static_cast(*I); - r = schedule_bb(bb); - } else { - r = run_on(static_cast(*I)); - } - if (!r) - break; - } - } - return r; -} - -void post_scheduler::init_uc_val(container_node *c, value *v) { - node *d = v->any_def(); - if (d && d->parent == c) - ++ucm[d]; -} - -void post_scheduler::init_uc_vec(container_node *c, vvec &vv, bool src) { - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (!v || v->is_readonly()) - continue; - - if (v->is_rel()) { - init_uc_val(c, v->rel); - init_uc_vec(c, v->muse, true); - } if (src) { - init_uc_val(c, v); - } - } -} - -unsigned post_scheduler::init_ucm(container_node *c, node *n) { - init_uc_vec(c, n->src, true); - init_uc_vec(c, n->dst, false); - - uc_map::iterator F = ucm.find(n); - return F == ucm.end() ? 0 : F->second; -} - -bool post_scheduler::schedule_bb(bb_node* bb) { - PSC_DUMP( - sblog << "scheduling BB " << bb->id << "\n"; - if (!pending.empty()) - dump::dump_op_list(&pending); - ); - - assert(pending.empty()); - assert(bb_pending.empty()); - assert(ready.empty()); - - bb_pending.append_from(bb); - cur_bb = bb; - - node *n; - - while ((n = bb_pending.back())) { - - PSC_DUMP( - sblog << "post_sched_bb "; - dump::dump_op(n); - sblog << "\n"; - ); - - // May require emitting ALU ops to load index registers - if (n->is_fetch_clause()) { - n->remove(); - process_fetch(static_cast(n)); - continue; - } - - if (n->is_alu_clause()) { - n->remove(); - bool r = process_alu(static_cast(n)); - if (r) - continue; - return false; - } - - n->remove(); - bb->push_front(n); - } - - this->cur_bb = NULL; - return true; -} - -void post_scheduler::init_regmap() { - - regmap.clear(); - - PSC_DUMP( - sblog << "init_regmap: live: "; - dump::dump_set(sh, live); - sblog << "\n"; - ); - - for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) { - value *v = *I; - assert(v); - if (!v->is_sgpr() || !v->is_prealloc()) - continue; - - sel_chan r = v->gpr; - - PSC_DUMP( - sblog << "init_regmap: " << r << " <= "; - dump::dump_val(v); - sblog << "\n"; - ); - - assert(r); - regmap[r] = v; - } -} - -static alu_node *create_set_idx(shader &sh, unsigned ar_idx) { - alu_node *a = sh.create_alu(); - - assert(ar_idx == V_SQ_CF_INDEX_0 || ar_idx == V_SQ_CF_INDEX_1); - if (ar_idx == V_SQ_CF_INDEX_0) - a->bc.set_op(ALU_OP0_SET_CF_IDX0); - else - a->bc.set_op(ALU_OP0_SET_CF_IDX1); - a->bc.slot = SLOT_X; - a->dst.resize(1); // Dummy needed for recolor - - PSC_DUMP( - sblog << "created IDX load: "; - dump::dump_op(a); - sblog << "\n"; - ); - - return a; -} - -void post_scheduler::load_index_register(value *v, unsigned ar_idx) -{ - alu.reset(); - - if (!sh.get_ctx().is_cayman()) { - // Evergreen has to first load address register, then use CF_SET_IDX0/1 - alu_group_tracker &rt = alu.grp(); - alu_node *set_idx = create_set_idx(sh, ar_idx); - if (!rt.try_reserve(set_idx)) { - sblog << "can't emit SET_CF_IDX"; - dump::dump_op(set_idx); - sblog << "\n"; - } - process_group(); - - if (!alu.check_clause_limits()) { - // Can't happen since clause only contains MOVA/CF_SET_IDX0/1 - } - alu.emit_group(); - } - - alu_group_tracker &rt = alu.grp(); - alu_node *a = alu.create_ar_load(v, ar_idx == V_SQ_CF_INDEX_1 ? SEL_Z : SEL_Y); - - if (!rt.try_reserve(a)) { - sblog << "can't emit AR load : "; - dump::dump_op(a); - sblog << "\n"; - } - - process_group(); - - if (!alu.check_clause_limits()) { - // Can't happen since clause only contains MOVA/CF_SET_IDX0/1 - } - - alu.emit_group(); - alu.emit_clause(cur_bb); -} - -void post_scheduler::process_fetch(container_node *c) { - if (c->empty()) - return; - - for (node_iterator N, I = c->begin(), E = c->end(); I != E; I = N) { - N = I; - ++N; - - node *n = *I; - - fetch_node *f = static_cast(n); - - PSC_DUMP( - sblog << "process_tex "; - dump::dump_op(n); - sblog << " "; - ); - - // TODO: If same values used can avoid reloading index register - if (f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE || - f->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) { - unsigned index_mode = f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ? - f->bc.sampler_index_mode : f->bc.resource_index_mode; - - // Currently require prior opt passes to use one TEX per indexed op - assert(f->parent->count() == 1); - - value *v = f->src.back(); // Last src is index offset - assert(v); - - cur_bb->push_front(c); - - load_index_register(v, index_mode); - f->src.pop_back(); // Don't need index value any more - - return; - } - } - - cur_bb->push_front(c); -} - -bool post_scheduler::process_alu(container_node *c) { - - if (c->empty()) - return true; - - ucm.clear(); - alu.reset(); - - live = c->live_after; - - init_globals(c->live_after, true); - init_globals(c->live_before, true); - - init_regmap(); - - update_local_interferences(); - - for (node_riterator N, I = c->rbegin(), E = c->rend(); I != E; I = N) { - N = I; - ++N; - - node *n = *I; - unsigned uc = init_ucm(c, n); - - PSC_DUMP( - sblog << "process_alu uc=" << uc << " "; - dump::dump_op(n); - sblog << " "; - ); - - if (uc) { - n->remove(); - - pending.push_back(n); - PSC_DUMP( sblog << "pending\n"; ); - } else { - release_op(n); - } - } - - return schedule_alu(c); -} - -void post_scheduler::update_local_interferences() { - - PSC_DUMP( - sblog << "update_local_interferences : "; - dump::dump_set(sh, live); - sblog << "\n"; - ); - - - for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) { - value *v = *I; - if (v->is_prealloc()) - continue; - - v->interferences.add_set(live); - } -} - -void post_scheduler::update_live_src_vec(vvec &vv, val_set *born, bool src) { - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - - if (!v) - continue; - - if (src && v->is_any_gpr()) { - if (live.add_val(v)) { - if (!v->is_prealloc()) { - if (!cleared_interf.contains(v)) { - PSC_DUMP( - sblog << "clearing interferences for " << *v << "\n"; - ); - v->interferences.clear(); - cleared_interf.add_val(v); - } - } - if (born) - born->add_val(v); - } - } else if (v->is_rel()) { - if (!v->rel->is_any_gpr()) - live.add_val(v->rel); - update_live_src_vec(v->muse, born, true); - } - } -} - -void post_scheduler::update_live_dst_vec(vvec &vv) { - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (!v) - continue; - - if (v->is_rel()) { - update_live_dst_vec(v->mdef); - } else if (v->is_any_gpr()) { - if (!live.remove_val(v)) { - PSC_DUMP( - sblog << "failed to remove "; - dump::dump_val(v); - sblog << " from live : "; - dump::dump_set(sh, live); - sblog << "\n"; - ); - } - } - } -} - -void post_scheduler::update_live(node *n, val_set *born) { - update_live_dst_vec(n->dst); - update_live_src_vec(n->src, born, true); - update_live_src_vec(n->dst, born, false); -} - -void post_scheduler::process_group() { - alu_group_tracker &rt = alu.grp(); - - val_set vals_born; - - recolor_locals(); - - PSC_DUMP( - sblog << "process_group: live_before : "; - dump::dump_set(sh, live); - sblog << "\n"; - ); - - for (unsigned s = 0; s < ctx.num_slots; ++s) { - alu_node *n = rt.slot(s); - if (!n) - continue; - - update_live(n, &vals_born); - } - - PSC_DUMP( - sblog << "process_group: live_after : "; - dump::dump_set(sh, live); - sblog << "\n"; - ); - - update_local_interferences(); - - for (unsigned i = 0; i < 5; ++i) { - node *n = rt.slot(i); - if (n && !n->is_mova()) { - release_src_values(n); - } - } -} - -void post_scheduler::init_globals(val_set &s, bool prealloc) { - - PSC_DUMP( - sblog << "init_globals: "; - dump::dump_set(sh, s); - sblog << "\n"; - ); - - for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) { - value *v = *I; - if (v->is_sgpr() && !v->is_global()) { - v->set_global(); - - if (prealloc && v->is_fixed()) { - v->set_prealloc(); - } - } - } -} - -void post_scheduler::emit_index_registers() { - for (unsigned i = 0; i < 2; i++) { - if (alu.current_idx[i]) { - regmap = prev_regmap; - alu.discard_current_group(); - - load_index_register(alu.current_idx[i], KC_INDEX_0 + i); - alu.current_idx[i] = NULL; - } - } -} - -void post_scheduler::emit_clause() { - - if (alu.current_ar) { - emit_load_ar(); - process_group(); - if (!alu.check_clause_limits()) { - // Can't happen since clause only contains MOVA/CF_SET_IDX0/1 - } - alu.emit_group(); - } - - if (!alu.is_empty()) { - alu.emit_clause(cur_bb); - } - - emit_index_registers(); -} - -bool post_scheduler::schedule_alu(container_node *c) { - - assert(!ready.empty() || !ready_copies.empty()); - - /* This number is rather arbitrary, important is that the scheduler has - * more than one try to create an instruction group - */ - int improving = 10; - int last_pending = pending.count(); - while (improving > 0) { - prev_regmap = regmap; - if (!prepare_alu_group()) { - - int new_pending = pending.count(); - if ((new_pending < last_pending) || (last_pending == 0)) - improving = 10; - else - --improving; - - last_pending = new_pending; - - if (alu.current_idx[0] || alu.current_idx[1]) { - regmap = prev_regmap; - emit_clause(); - init_globals(live, false); - - continue; - } - - if (alu.current_ar) { - emit_load_ar(); - continue; - } else - break; - } - - if (!alu.check_clause_limits()) { - regmap = prev_regmap; - emit_clause(); - init_globals(live, false); - - continue; - } - - process_group(); - alu.emit_group(); - }; - - if (!alu.is_empty()) { - emit_clause(); - } - - if (!ready.empty()) { - sblog << "##post_scheduler: unscheduled ready instructions :"; - dump::dump_op_list(&ready); - } - - if (!pending.empty()) { - sblog << "##post_scheduler: unscheduled pending instructions :"; - dump::dump_op_list(&pending); - } - return pending.empty() && ready.empty() && improving != 0; -} - -void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) { - unsigned chan = v->gpr.chan(); - - for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); - I != E; ++I) { - value *vi = *I; - sel_chan gpr = vi->get_final_gpr(); - - if (vi->is_any_gpr() && gpr && vi != v && - (!v->chunk || v->chunk != vi->chunk) && - vi->is_fixed() && gpr.chan() == chan) { - - unsigned r = gpr.sel(); - - PSC_DUMP( - sblog << "\tadd_interferences: " << *vi << "\n"; - ); - - if (rb.size() <= r) - rb.resize(r + 32); - rb.set(r); - } - } -} - -void post_scheduler::set_color_local_val(value *v, sel_chan color) { - v->gpr = color; - - PSC_DUMP( - sblog << " recolored: "; - dump::dump_val(v); - sblog << "\n"; - ); -} - -void post_scheduler::set_color_local(value *v, sel_chan color) { - if (v->chunk) { - vvec &vv = v->chunk->values; - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v2 =*I; - set_color_local_val(v2, color); - } - v->chunk->fix(); - } else { - set_color_local_val(v, color); - v->fix(); - } -} - -bool post_scheduler::recolor_local(value *v) { - - sb_bitset rb; - - assert(v->is_sgpr()); - assert(!v->is_prealloc()); - assert(v->gpr); - - unsigned chan = v->gpr.chan(); - - PSC_DUMP( - sblog << "recolor_local: "; - dump::dump_val(v); - sblog << " interferences: "; - dump::dump_set(sh, v->interferences); - sblog << "\n"; - if (v->chunk) { - sblog << " in chunk: "; - coalescer::dump_chunk(v->chunk); - sblog << "\n"; - } - ); - - if (v->chunk) { - for (vvec::iterator I = v->chunk->values.begin(), - E = v->chunk->values.end(); I != E; ++I) { - value *v2 = *I; - - PSC_DUMP( sblog << " add_interferences for " << *v2 << " :\n"; ); - - add_interferences(v, rb, v2->interferences); - } - } else { - add_interferences(v, rb, v->interferences); - } - - PSC_DUMP( - unsigned sz = rb.size(); - sblog << "registers bits: " << sz; - for (unsigned r = 0; r < sz; ++r) { - if ((r & 7) == 0) - sblog << "\n " << r << " "; - sblog << (rb.get(r) ? 1 : 0); - } - ); - - bool no_temp_gprs = v->is_global(); - unsigned rs, re, pass = no_temp_gprs ? 1 : 0; - - while (pass < 2) { - - if (pass == 0) { - rs = sh.first_temp_gpr(); - re = MAX_GPR; - } else { - rs = 0; - re = sh.num_nontemp_gpr(); - } - - for (unsigned reg = rs; reg < re; ++reg) { - if (reg >= rb.size() || !rb.get(reg)) { - // color found - set_color_local(v, sel_chan(reg, chan)); - return true; - } - } - ++pass; - } - - assert(!"recolor_local failed"); - return true; -} - -void post_scheduler::emit_load_ar() { - - regmap = prev_regmap; - alu.discard_current_group(); - - alu_group_tracker &rt = alu.grp(); - alu_node *a = alu.create_ar_load(alu.current_ar, SEL_X); - - if (!rt.try_reserve(a)) { - sblog << "can't emit AR load : "; - dump::dump_op(a); - sblog << "\n"; - } - - alu.current_ar = 0; -} - -bool post_scheduler::unmap_dst_val(value *d) { - - if (d == alu.current_ar) { - emit_load_ar(); - return false; - } - - if (d->is_prealloc()) { - sel_chan gpr = d->get_final_gpr(); - rv_map::iterator F = regmap.find(gpr); - value *c = NULL; - if (F != regmap.end()) - c = F->second; - - if (c && c!=d && (!c->chunk || c->chunk != d->chunk)) { - PSC_DUMP( - sblog << "dst value conflict : "; - dump::dump_val(d); - sblog << " regmap contains "; - dump::dump_val(c); - sblog << "\n"; - ); - assert(!"scheduler error"); - return false; - } else if (c) { - regmap.erase(F); - } - } - return true; -} - -bool post_scheduler::unmap_dst(alu_node *n) { - value *d = n->dst.empty() ? NULL : n->dst[0]; - - if (!d) - return true; - - if (!d->is_rel()) { - if (d && d->is_any_reg()) { - - if (d->is_AR()) { - if (alu.current_ar != d) { - sblog << "loading wrong ar value\n"; - assert(0); - } else { - alu.current_ar = NULL; - } - - } else if (d->is_any_gpr()) { - if (!unmap_dst_val(d)) - return false; - } - } - } else { - for (vvec::iterator I = d->mdef.begin(), E = d->mdef.end(); - I != E; ++I) { - d = *I; - if (!d) - continue; - - assert(d->is_any_gpr()); - - if (!unmap_dst_val(d)) - return false; - } - } - return true; -} - -bool post_scheduler::map_src_val(value *v) { - - if (!v->is_prealloc()) - return true; - - sel_chan gpr = v->get_final_gpr(); - rv_map::iterator F = regmap.find(gpr); - value *c = NULL; - if (F != regmap.end()) { - c = F->second; - if (!v->v_equal(c)) { - PSC_DUMP( - sblog << "can't map src value "; - dump::dump_val(v); - sblog << ", regmap contains "; - dump::dump_val(c); - sblog << "\n"; - ); - return false; - } - } else { - regmap.insert(std::make_pair(gpr, v)); - } - return true; -} - -bool post_scheduler::map_src_vec(vvec &vv, bool src) { - if (src) { - // Handle possible UBO indexing - bool ubo_indexing[2] = { false, false }; - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (!v) - continue; - - if (v->is_kcache()) { - unsigned index_mode = v->select.kcache_index_mode(); - if (index_mode == KC_INDEX_0 || index_mode == KC_INDEX_1) { - ubo_indexing[index_mode - KC_INDEX_0] = true; - } - } - } - - // idx values stored at end of src vec, see bc_parser::prepare_alu_group - for (unsigned i = 2; i != 0; i--) { - if (ubo_indexing[i-1]) { - // TODO: skip adding value to kcache reservation somehow, causes - // unnecessary group breaks and cache line locks - value *v = vv.back(); - if (alu.current_idx[i-1] && alu.current_idx[i-1] != v) { - PSC_DUMP( - sblog << "IDX" << i-1 << " already set to " << - *alu.current_idx[i-1] << ", trying to set " << *v << "\n"; - ); - return false; - } - - alu.current_idx[i-1] = v; - PSC_DUMP(sblog << "IDX" << i-1 << " set to " << *v << "\n";); - } - } - } - - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (!v) - continue; - - if ((!v->is_any_gpr() || !v->is_fixed()) && !v->is_rel()) - continue; - - if (v->is_rel()) { - value *rel = v->rel; - assert(rel); - - if (!rel->is_const()) { - if (!map_src_vec(v->muse, true)) - return false; - - if (rel != alu.current_ar) { - if (alu.current_ar) { - PSC_DUMP( - sblog << " current_AR is " << *alu.current_ar - << " trying to use " << *rel << "\n"; - ); - return false; - } - - alu.current_ar = rel; - - PSC_DUMP( - sblog << " new current_AR assigned: " << *alu.current_ar - << "\n"; - ); - } - } - - } else if (src) { - if (!map_src_val(v)) { - return false; - } - } - } - return true; -} - -bool post_scheduler::map_src(alu_node *n) { - if (!map_src_vec(n->dst, false)) - return false; - - if (!map_src_vec(n->src, true)) - return false; - - return true; -} - -void post_scheduler::dump_regmap() { - - sblog << "# REGMAP :\n"; - - for(rv_map::iterator I = regmap.begin(), E = regmap.end(); I != E; ++I) { - sblog << " # " << I->first << " => " << *(I->second) << "\n"; - } - - if (alu.current_ar) - sblog << " current_AR: " << *alu.current_ar << "\n"; - if (alu.current_pr) - sblog << " current_PR: " << *alu.current_pr << "\n"; - if (alu.current_idx[0]) - sblog << " current IDX0: " << *alu.current_idx[0] << "\n"; - if (alu.current_idx[1]) - sblog << " current IDX1: " << *alu.current_idx[1] << "\n"; -} - -void post_scheduler::recolor_locals() { - alu_group_tracker &rt = alu.grp(); - - for (unsigned s = 0; s < ctx.num_slots; ++s) { - alu_node *n = rt.slot(s); - if (n) { - value *d = n->dst[0]; - if (d && d->is_sgpr() && !d->is_prealloc()) { - recolor_local(d); - } - } - } -} - -// returns true if there are interferences -bool post_scheduler::check_interferences() { - - alu_group_tracker &rt = alu.grp(); - - unsigned interf_slots; - - bool discarded = false; - - PSC_DUMP( - sblog << "check_interferences: before: \n"; - dump_regmap(); - ); - - do { - - interf_slots = 0; - - for (unsigned s = 0; s < ctx.num_slots; ++s) { - alu_node *n = rt.slot(s); - if (n) { - if (!unmap_dst(n)) { - return true; - } - } - } - - for (unsigned s = 0; s < ctx.num_slots; ++s) { - alu_node *n = rt.slot(s); - if (n) { - if (!map_src(n)) { - interf_slots |= (1 << s); - } - } - } - - PSC_DUMP( - for (unsigned i = 0; i < 5; ++i) { - if (interf_slots & (1 << i)) { - sblog << "!!!!!! interf slot: " << i << " : "; - dump::dump_op(rt.slot(i)); - sblog << "\n"; - } - } - ); - - if (!interf_slots) - break; - - PSC_DUMP( sblog << "ci: discarding slots " << interf_slots << "\n"; ); - - rt.discard_slots(interf_slots, alu.conflict_nodes); - regmap = prev_regmap; - discarded = true; - - } while(1); - - PSC_DUMP( - sblog << "check_interferences: after: \n"; - dump_regmap(); - ); - - return discarded; -} - -// add instruction(s) (alu_node or contents of alu_packed_node) to current group -// returns the number of added instructions on success -unsigned post_scheduler::try_add_instruction(node *n) { - - alu_group_tracker &rt = alu.grp(); - - unsigned avail_slots = rt.avail_slots(); - - // Cannot schedule in same clause as instructions using this index value - if (!n->dst.empty() && n->dst[0] && - (n->dst[0] == alu.current_idx[0] || n->dst[0] == alu.current_idx[1])) { - PSC_DUMP(sblog << " CF_IDX source: " << *n->dst[0] << "\n";); - return 0; - } - - if (n->is_alu_packed()) { - alu_packed_node *p = static_cast(n); - unsigned slots = p->get_slot_mask(); - unsigned cnt = __builtin_popcount(slots); - - if ((slots & avail_slots) != slots) { - PSC_DUMP( sblog << " no slots \n"; ); - return 0; - } - - p->update_packed_items(ctx); - - if (!rt.try_reserve(p)) { - PSC_DUMP( sblog << " reservation failed \n"; ); - return 0; - } - - p->remove(); - return cnt; - - } else { - alu_node *a = static_cast(n); - value *d = a->dst.empty() ? NULL : a->dst[0]; - - if (d && d->is_special_reg()) { - assert((a->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit() || d->is_lds_oq() || d->is_lds_access() || d->is_scratch()); - d = NULL; - } - - unsigned allowed_slots = ctx.alu_slots_mask(a->bc.op_ptr); - unsigned slot; - - allowed_slots &= avail_slots; - - if (!allowed_slots) - return 0; - - if (d) { - slot = d->get_final_chan(); - a->bc.dst_chan = slot; - allowed_slots &= (1 << slot) | 0x10; - } else { - if (a->bc.op_ptr->flags & AF_MOVA) { - if (a->bc.slot_flags & AF_V) - allowed_slots &= (1 << SLOT_X); - else - allowed_slots &= (1 << SLOT_TRANS); - } - } - - // FIXME workaround for some problems with MULADD in trans slot on r700, - // (is it really needed on r600?) - if ((a->bc.op == ALU_OP3_MULADD || a->bc.op == ALU_OP3_MULADD_IEEE) && - !ctx.is_egcm()) { - allowed_slots &= 0x0F; - } - - if (!allowed_slots) { - PSC_DUMP( sblog << " no suitable slots\n"; ); - return 0; - } - - slot = __builtin_ctz(allowed_slots); - a->bc.slot = slot; - - PSC_DUMP( sblog << "slot: " << slot << "\n"; ); - - if (!rt.try_reserve(a)) { - PSC_DUMP( sblog << " reservation failed\n"; ); - return 0; - } - - a->remove(); - return 1; - } -} - -bool post_scheduler::check_copy(node *n) { - if (!n->is_copy_mov()) - return false; - - value *s = n->src[0]; - value *d = n->dst[0]; - - if (!s->is_sgpr() || !d->is_sgpr()) - return false; - - if (!s->is_prealloc()) { - recolor_local(s); - - if (!s->chunk || s->chunk != d->chunk) - return false; - } - - if (s->gpr == d->gpr) { - - PSC_DUMP( - sblog << "check_copy: "; - dump::dump_op(n); - sblog << "\n"; - ); - - rv_map::iterator F = regmap.find(d->gpr); - bool gpr_free = (F == regmap.end()); - - if (d->is_prealloc()) { - if (gpr_free) { - PSC_DUMP( sblog << " copy not ready...\n";); - return true; - } - - value *rv = F->second; - if (rv != d && (!rv->chunk || rv->chunk != d->chunk)) { - PSC_DUMP( sblog << " copy not ready(2)...\n";); - return true; - } - - unmap_dst(static_cast(n)); - } - - if (s->is_prealloc() && !map_src_val(s)) - return true; - - update_live(n, NULL); - - release_src_values(n); - n->remove(); - PSC_DUMP( sblog << " copy coalesced...\n";); - return true; - } - return false; -} - -void post_scheduler::dump_group(alu_group_tracker &rt) { - for (unsigned i = 0; i < 5; ++i) { - node *n = rt.slot(i); - if (n) { - sblog << "slot " << i << " : "; - dump::dump_op(n); - sblog << "\n"; - } - } -} - -void post_scheduler::process_ready_copies() { - - node *last; - - do { - last = ready_copies.back(); - - for (node_iterator N, I = ready_copies.begin(), E = ready_copies.end(); - I != E; I = N) { - N = I; ++N; - - node *n = *I; - - if (!check_copy(n)) { - n->remove(); - ready.push_back(n); - } - } - } while (last != ready_copies.back()); - - update_local_interferences(); -} - - -bool post_scheduler::prepare_alu_group() { - - alu_group_tracker &rt = alu.grp(); - - unsigned i1 = 0; - - PSC_DUMP( - sblog << "prepare_alu_group: starting...\n"; - dump_group(rt); - ); - - ready.append_from(&alu.conflict_nodes); - - // FIXME rework this loop - - do { - - process_ready_copies(); - - ++i1; - - for (node_iterator N, I = ready.begin(), E = ready.end(); I != E; - I = N) { - N = I; ++N; - node *n = *I; - - PSC_DUMP( - sblog << "p_a_g: "; - dump::dump_op(n); - sblog << "\n"; - ); - - - unsigned cnt = try_add_instruction(n); - - if (!cnt) - continue; - - PSC_DUMP( - sblog << "current group:\n"; - dump_group(rt); - ); - - if (rt.inst_count() == ctx.num_slots) { - PSC_DUMP( sblog << " all slots used\n"; ); - break; - } - } - - if (!check_interferences()) - break; - - // don't try to add more instructions to the group with mova if this - // can lead to breaking clause slot count limit - we don't want mova to - // end up in the end of the new clause instead of beginning of the - // current clause. - if (rt.has_ar_load() && alu.total_slots() > 121) - break; - - if (rt.inst_count() && i1 > 50) - break; - - regmap = prev_regmap; - - } while (1); - - PSC_DUMP( - sblog << " prepare_alu_group done, " << rt.inst_count() - << " slot(s) \n"; - - sblog << "$$$$$$$$PAG i1=" << i1 - << " ready " << ready.count() - << " pending " << pending.count() - << " conflicting " << alu.conflict_nodes.count() - <<"\n"; - - ); - - return rt.inst_count(); -} - -void post_scheduler::release_src_values(node* n) { - release_src_vec(n->src, true); - release_src_vec(n->dst, false); -} - -void post_scheduler::release_op(node *n) { - PSC_DUMP( - sblog << "release_op "; - dump::dump_op(n); - sblog << "\n"; - ); - - n->remove(); - - if (n->is_copy_mov()) { - ready_copies.push_back(n); - } else if (n->is_mova() || n->is_pred_set()) { - ready.push_front(n); - } else { - ready.push_back(n); - } -} - -void post_scheduler::release_src_val(value *v) { - node *d = v->any_def(); - if (d) { - if (!--ucm[d]) - release_op(d); - } -} - -void post_scheduler::release_src_vec(vvec& vv, bool src) { - - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (!v || v->is_readonly()) - continue; - - if (v->is_rel()) { - release_src_val(v->rel); - release_src_vec(v->muse, true); - - } else if (src) { - release_src_val(v); - } - } -} - -void literal_tracker::reset() { - lt[0].u = 0; - lt[1].u = 0; - lt[2].u = 0; - lt[3].u = 0; - memset(uc, 0, sizeof(uc)); -} - -void rp_gpr_tracker::reset() { - memset(rp, 0, sizeof(rp)); - memset(uc, 0, sizeof(uc)); -} - -void rp_kcache_tracker::reset() { - memset(rp, 0, sizeof(rp)); - memset(uc, 0, sizeof(uc)); -} - -void alu_kcache_tracker::reset() { - memset(kc, 0, sizeof(kc)); - lines.clear(); -} - -void alu_clause_tracker::reset() { - group = 0; - slot_count = 0; - outstanding_lds_oqa_reads = 0; - grp0.reset(); - grp1.reset(); -} - -alu_clause_tracker::alu_clause_tracker(shader &sh) - : sh(sh), kt(sh.get_ctx().hw_class), slot_count(), - grp0(sh), grp1(sh), - group(), clause(), - push_exec_mask(), outstanding_lds_oqa_reads(), - current_ar(), current_pr(), current_idx() {} - -void alu_clause_tracker::emit_group() { - - assert(grp().inst_count()); - - alu_group_node *g = grp().emit(); - - if (grp().has_update_exec_mask()) { - assert(!push_exec_mask); - push_exec_mask = true; - } - - assert(g); - - if (!clause) { - clause = sh.create_clause(NST_ALU_CLAUSE); - } - - clause->push_front(g); - - outstanding_lds_oqa_reads += grp().get_consumes_lds_oqa(); - outstanding_lds_oqa_reads -= grp().get_produces_lds_oqa(); - slot_count += grp().slot_count(); - - new_group(); - - PSC_DUMP( sblog << " #### group emitted\n"; ); -} - -void alu_clause_tracker::emit_clause(container_node *c) { - assert(clause); - - kt.init_clause(clause->bc); - - assert(!outstanding_lds_oqa_reads); - assert(!current_ar); - assert(!current_pr); - - if (push_exec_mask) - clause->bc.set_op(CF_OP_ALU_PUSH_BEFORE); - - c->push_front(clause); - - clause = NULL; - push_exec_mask = false; - slot_count = 0; - kt.reset(); - - PSC_DUMP( sblog << "######### ALU clause emitted\n"; ); -} - -bool alu_clause_tracker::check_clause_limits() { - - alu_group_tracker > = grp(); - - unsigned slots = gt.slot_count(); - - // reserving slots to load AR and PR values - unsigned reserve_slots = (current_ar ? 1 : 0) + (current_pr ? 1 : 0); - // ...and index registers - reserve_slots += (current_idx[0] != NULL) + (current_idx[1] != NULL); - - if (gt.get_consumes_lds_oqa() && !outstanding_lds_oqa_reads) - reserve_slots += 60; - - if (slot_count + slots > MAX_ALU_SLOTS - reserve_slots) - return false; - - if (!kt.try_reserve(gt)) - return false; - - return true; -} - -void alu_clause_tracker::new_group() { - group = !group; - grp().reset(); -} - -bool alu_clause_tracker::is_empty() { - return clause == NULL; -} - -void literal_tracker::init_group_literals(alu_group_node* g) { - - g->literals.clear(); - for (unsigned i = 0; i < 4; ++i) { - if (!lt[i]) - break; - - g->literals.push_back(lt[i]); - - PSC_DUMP( - sblog << "literal emitted: " << lt[i].f; - sblog.print_zw_hex(lt[i].u, 8); - sblog << " " << lt[i].i << "\n"; - ); - } -} - -bool alu_kcache_tracker::try_reserve(alu_group_tracker& gt) { - rp_kcache_tracker &kt = gt.kcache(); - - if (!kt.num_sels()) - return true; - - sb_set group_lines; - - ASSERTED unsigned nl = kt.get_lines(group_lines); - assert(nl); - - sb_set clause_lines(lines); - lines.add_set(group_lines); - - if (clause_lines.size() == lines.size()) - return true; - - if (update_kc()) - return true; - - lines = clause_lines; - - return false; -} - -unsigned rp_kcache_tracker::get_lines(kc_lines& lines) { - unsigned cnt = 0; - - for (unsigned i = 0; i < sel_count; ++i) { - unsigned line = rp[i] & 0x1fffffffu; - unsigned index_mode = rp[i] >> 29; - - if (!line) - return cnt; - - --line; - line = (sel_count == 2) ? line >> 5 : line >> 6; - line |= index_mode << 29; - - if (lines.insert(line).second) - ++cnt; - } - return cnt; -} - -bool alu_kcache_tracker::update_kc() { - unsigned c = 0; - - bc_kcache old_kc[4]; - memcpy(old_kc, kc, sizeof(kc)); - - for (kc_lines::iterator I = lines.begin(), E = lines.end(); I != E; ++I) { - unsigned index_mode = *I >> 29; - unsigned line = *I & 0x1fffffffu; - unsigned bank = line >> 8; - - assert(index_mode <= KC_INDEX_INVALID); - line &= 0xFF; - - if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line) && - kc[c-1].index_mode == index_mode) - { - kc[c-1].mode = KC_LOCK_2; - } else { - if (c == max_kcs) { - memcpy(kc, old_kc, sizeof(kc)); - return false; - } - - kc[c].mode = KC_LOCK_1; - - kc[c].bank = bank; - kc[c].addr = line; - kc[c].index_mode = index_mode; - ++c; - } - } - return true; -} - -alu_node* alu_clause_tracker::create_ar_load(value *v, chan_select ar_channel) { - alu_node *a = sh.create_alu(); - - if (sh.get_ctx().uses_mova_gpr) { - a->bc.set_op(ALU_OP1_MOVA_GPR_INT); - a->bc.slot = SLOT_TRANS; - } else { - a->bc.set_op(ALU_OP1_MOVA_INT); - a->bc.slot = SLOT_X; - } - a->bc.dst_chan = ar_channel; - if (ar_channel != SEL_X && sh.get_ctx().is_cayman()) { - a->bc.dst_gpr = ar_channel == SEL_Y ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1; - } - - a->dst.resize(1); - a->src.push_back(v); - - PSC_DUMP( - sblog << "created AR load: "; - dump::dump_op(a); - sblog << "\n"; - ); - - return a; -} - -void alu_clause_tracker::discard_current_group() { - PSC_DUMP( sblog << "act::discard_current_group\n"; ); - grp().discard_all_slots(conflict_nodes); -} - -void rp_gpr_tracker::dump() { - sblog << "=== gpr_tracker dump:\n"; - for (int c = 0; c < 3; ++c) { - sblog << "cycle " << c << " "; - for (int h = 0; h < 4; ++h) { - sblog << rp[c][h] << ":" << uc[c][h] << " "; - } - sblog << "\n"; - } -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_sched.h b/src/gallium/drivers/r600/sb/sb_sched.h deleted file mode 100644 index 91a34e0..0000000 --- a/src/gallium/drivers/r600/sb/sb_sched.h +++ /dev/null @@ -1,338 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#ifndef SB_SCHED_H_ -#define SB_SCHED_H_ - -namespace r600_sb { - -typedef sb_map uc_map; - -// resource trackers for scheduler -// rp = read port -// uc = use count - -typedef sb_set kc_lines; - -class rp_kcache_tracker { - unsigned rp[4]; - unsigned uc[4]; - const unsigned sel_count; - - unsigned kc_sel(sel_chan r) { - return sel_count == 4 ? (unsigned)r : ((r - 1) >> 1) + 1; - } - -public: - rp_kcache_tracker(shader &sh); - - bool try_reserve(node *n); - void unreserve(node *n); - - - bool try_reserve(sel_chan r); - void unreserve(sel_chan r); - - void reset(); - - unsigned num_sels() { return !!rp[0] + !!rp[1] + !!rp[2] + !!rp[3]; } - - unsigned get_lines(kc_lines &lines); -}; - -class literal_tracker { - literal lt[4]; - unsigned uc[4]; - -public: - literal_tracker() : lt(), uc() {} - - bool try_reserve(alu_node *n); - void unreserve(alu_node *n); - - bool try_reserve(literal l); - void unreserve(literal l); - - void reset(); - - unsigned count() { return !!uc[0] + !!uc[1] + !!uc[2] + !!uc[3]; } - - void init_group_literals(alu_group_node *g); - -}; - -class rp_gpr_tracker { - // rp[cycle][elem] - unsigned rp[3][4]; - unsigned uc[3][4]; - -public: - rp_gpr_tracker() : rp(), uc() {} - - bool try_reserve(alu_node *n); - void unreserve(alu_node *n); - - bool try_reserve(unsigned cycle, unsigned sel, unsigned chan); - void unreserve(unsigned cycle, unsigned sel, unsigned chan); - - void reset(); - - void dump(); -}; - -class alu_group_tracker { - - shader &sh; - - rp_kcache_tracker kc; - rp_gpr_tracker gpr; - literal_tracker lt; - - alu_node * slots[5]; - - unsigned available_slots; - - unsigned max_slots; - - typedef std::map value_index_map; - - value_index_map vmap; - - bool has_mova; - bool uses_ar; - bool has_predset; - bool has_kill; - bool updates_exec_mask; - - bool consumes_lds_oqa; - bool produces_lds_oqa; - unsigned chan_count[4]; - - // param index + 1 (0 means that group doesn't refer to Params) - // we can't use more than one param index in a group - unsigned interp_param; - - unsigned next_id; - - node_vec packed_ops; - - void assign_slot(unsigned slot, alu_node *n); - -public: - alu_group_tracker(shader &sh); - - // FIXME use fast bs correctness check (values for same chan <= 3) ?? - bool try_reserve(alu_node *n); - bool try_reserve(alu_packed_node *p); - - void reinit(); - void reset(bool keep_packed = false); - - sel_chan get_value_id(value *v); - void update_flags(alu_node *n); - - alu_node* slot(unsigned i) { return slots[i]; } - - unsigned used_slots() { - return (~available_slots) & ((1 << max_slots) - 1); - } - - unsigned inst_count() { - return __builtin_popcount(used_slots()); - } - - unsigned literal_count() { return lt.count(); } - unsigned literal_slot_count() { return (literal_count() + 1) >> 1; }; - unsigned slot_count() { return inst_count() + literal_slot_count(); } - - bool get_consumes_lds_oqa() { return consumes_lds_oqa; } - bool get_produces_lds_oqa() { return produces_lds_oqa; } - alu_group_node* emit(); - - rp_kcache_tracker& kcache() { return kc; } - - bool has_update_exec_mask() { return updates_exec_mask; } - unsigned avail_slots() { return available_slots; } - - void discard_all_slots(container_node &removed_nodes); - void discard_slots(unsigned slot_mask, container_node &removed_nodes); - - bool has_ar_load() { return has_mova; } -}; - -class alu_kcache_tracker { - bc_kcache kc[4]; - sb_set lines; - unsigned max_kcs; - -public: - - alu_kcache_tracker(sb_hw_class hc) - : kc(), lines(), max_kcs(hc >= HW_CLASS_EVERGREEN ? 4 : 2) {} - - void reset(); - bool try_reserve(alu_group_tracker >); - bool update_kc(); - void init_clause(bc_cf &bc) { - memcpy(bc.kc, kc, sizeof(kc)); - } -}; - -class alu_clause_tracker { - shader &sh; - - alu_kcache_tracker kt; - unsigned slot_count; - - alu_group_tracker grp0; - alu_group_tracker grp1; - - unsigned group; - - cf_node *clause; - - bool push_exec_mask; - - unsigned outstanding_lds_oqa_reads; -public: - container_node conflict_nodes; - - // current values of AR and PR registers that we have to preload - // till the end of clause (in fact, beginning, because we're scheduling - // bottom-up) - value *current_ar; - value *current_pr; - // current values of CF_IDX registers that need preloading - value *current_idx[2]; - - alu_clause_tracker(shader &sh); - - void reset(); - - // current group - alu_group_tracker& grp() { return group ? grp1 : grp0; } - // previous group - alu_group_tracker& prev_grp() { return group ? grp0 : grp1; } - - void emit_group(); - void emit_clause(container_node *c); - bool check_clause_limits(); - void new_group(); - bool is_empty(); - - alu_node* create_ar_load(value *v, chan_select ar_channel); - - void discard_current_group(); - - unsigned total_slots() { return slot_count; } -}; - -class post_scheduler : public pass { - - container_node ready, ready_copies; // alu only - container_node pending, bb_pending; - bb_node *cur_bb; - val_set live; // values live at the end of the alu clause - uc_map ucm; - alu_clause_tracker alu; - - typedef std::map rv_map; - rv_map regmap, prev_regmap; - - val_set cleared_interf; - - void emit_index_registers(); -public: - - post_scheduler(shader &sh) : pass(sh), - ready(), ready_copies(), pending(), cur_bb(), - live(), ucm(), alu(sh), regmap(), cleared_interf() {} - - virtual int run(); - bool run_on(container_node *n); - bool schedule_bb(bb_node *bb); - - void load_index_register(value *v, unsigned idx); - void process_fetch(container_node *c); - - bool process_alu(container_node *c); - bool schedule_alu(container_node *c); - bool prepare_alu_group(); - - void release_op(node *n); - - void release_src_values(node *n); - void release_src_vec(vvec &vv, bool src); - void release_src_val(value *v); - - void init_uc_val(container_node *c, value *v); - void init_uc_vec(container_node *c, vvec &vv, bool src); - unsigned init_ucm(container_node *c, node *n); - - void init_regmap(); - - bool check_interferences(); - - unsigned try_add_instruction(node *n); - - bool check_copy(node *n); - void dump_group(alu_group_tracker &rt); - - bool unmap_dst(alu_node *n); - bool unmap_dst_val(value *d); - - bool map_src(alu_node *n); - bool map_src_vec(vvec &vv, bool src); - bool map_src_val(value *v); - - bool recolor_local(value *v); - - void update_local_interferences(); - void update_live_src_vec(vvec &vv, val_set *born, bool src); - void update_live_dst_vec(vvec &vv); - void update_live(node *n, val_set *born); - void process_group(); - - void set_color_local_val(value *v, sel_chan color); - void set_color_local(value *v, sel_chan color); - - void add_interferences(value *v, sb_bitset &rb, val_set &vs); - - void init_globals(val_set &s, bool prealloc); - - void recolor_locals(); - - void dump_regmap(); - - void emit_load_ar(); - void emit_clause(); - - void process_ready_copies(); -}; - -} // namespace r600_sb - -#endif /* SB_SCHED_H_ */ diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp deleted file mode 100644 index a01972d..0000000 --- a/src/gallium/drivers/r600/sb/sb_shader.cpp +++ /dev/null @@ -1,687 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include "sb_bc.h" -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -shader::shader(sb_context &sctx, shader_target t, unsigned id) -: ctx(sctx), next_temp_value_index(temp_regid_offset), - prep_regs_count(), pred_sels(), - regions(), inputs(), undef(), val_pool(sizeof(value)), - pool(), all_nodes(), src_stats(), opt_stats(), errors(), - optimized(), id(id), - coal(*this), bbs(), - target(t), ex(*this), vt(ex), root(), - compute_interferences(), - has_alu_predication(), - uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {} - -bool shader::assign_slot(alu_node* n, alu_node *slots[5]) { - - unsigned slot_flags = ctx.alu_slots(n->bc.op); - unsigned slot = n->bc.dst_chan; - - if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) && - (slot_flags & AF_S)) - slot = SLOT_TRANS; - - if (slots[slot]) - return false; - - n->bc.slot = slot; - slots[slot] = n; - return true; -} - -void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, - bool src) { - unsigned chan = 0; - while (comp_mask) { - if (comp_mask & 1) { - value *v = get_gpr_value(src, gpr, chan, false); - v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN); - if (!v->is_rel()) { - v->gpr = v->pin_gpr = v->select; - v->fix(); - } - if (v->array && !v->array->gpr) { - // if pinned value can be accessed with indirect addressing - // pin the entire array to its original location - v->array->gpr = v->array->base_gpr; - } - vec.push_back(v); - } - comp_mask >>= 1; - ++chan; - } -} - -cf_node* shader::create_clause(node_subtype nst) { - cf_node *n = create_cf(); - - n->subtype = nst; - - switch (nst) { - case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break; - case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break; - case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break; - case NST_GDS_CLAUSE: n->bc.set_op(CF_OP_GDS); break; - default: assert(!"invalid clause type"); break; - } - - n->bc.barrier = 1; - return n; -} - -void shader::create_bbs() { - create_bbs(root, bbs); -} - -void shader::expand_bbs() { - expand_bbs(bbs); -} - -alu_node* shader::create_mov(value* dst, value* src) { - alu_node *n = create_alu(); - n->bc.set_op(ALU_OP1_MOV); - n->dst.push_back(dst); - n->src.push_back(src); - dst->def = n; - - return n; -} - -alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) { - alu_node *n = create_mov(dst, src); - - dst->assign_source(src); - n->flags |= NF_COPY_MOV | NF_DONT_HOIST; - - if (affcost && dst->is_sgpr() && src->is_sgpr()) - coal.add_edge(src, dst, affcost); - - return n; -} - -value* shader::get_value(value_kind kind, sel_chan id, - unsigned version) { - if (version == 0 && kind == VLK_REG && id.sel() < prep_regs_count) - return val_pool[id - 1]; - - - unsigned key = (kind << 28) | (version << 16) | id; - value_map::iterator i = reg_values.find(key); - if (i != reg_values.end()) { - return i->second; - } - value *v = create_value(kind, id, version); - reg_values.insert(std::make_pair(key, v)); - return v; -} - -value* shader::get_special_value(unsigned sv_id, unsigned version) { - sel_chan id(sv_id, 0); - return get_value(VLK_SPECIAL_REG, id, version); -} - -void shader::fill_array_values(gpr_array *a, vvec &vv) { - unsigned sz = a->array_size; - vv.resize(sz); - for (unsigned i = 0; i < a->array_size; ++i) { - vv[i] = get_gpr_value(true, a->base_gpr.sel() + i, a->base_gpr.chan(), - false); - } -} - -value* shader::get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel, - unsigned version) { - sel_chan id(reg, chan); - value *v; - gpr_array *a = get_gpr_array(reg, chan); - if (rel) { - assert(a); - v = create_value(VLK_REL_REG, id, 0); - v->rel = get_special_value(SV_AR_INDEX); - fill_array_values(a, v->muse); - if (!src) - fill_array_values(a, v->mdef); - } else { - if (version == 0 && reg < prep_regs_count) - return (val_pool[id - 1]); - - v = get_value(VLK_REG, id, version); - } - - v->array = a; - v->pin_gpr = v->select; - - return v; -} - -value* shader::create_temp_value() { - sel_chan id(++next_temp_value_index, 0); - return get_value(VLK_TEMP, id, 0); -} - -value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode) { - return get_ro_value(kcache_values, VLK_KCACHE, - sel_chan(bank, index, chan, index_mode)); -} - -void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) { - if (inputs.size() <= gpr) - inputs.resize(gpr+1); - - shader_input &i = inputs[gpr]; - i.preloaded = preloaded; - i.comp_mask = comp_mask; - - if (preloaded) { - add_pinned_gpr_values(root->dst, gpr, comp_mask, true); - } - -} - -void shader::init() { - assert(!root); - root = create_container(); -} - -void shader::init_call_fs(cf_node* cf) { - unsigned gpr = 0; - - assert(target == TARGET_LS || target == TARGET_VS || target == TARGET_ES); - - for(inputs_vec::const_iterator I = inputs.begin(), - E = inputs.end(); I != E; ++I, ++gpr) { - if (!I->preloaded) - add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false); - else - add_pinned_gpr_values(cf->src, gpr, I->comp_mask, true); - } -} - -void shader::set_undef(val_set& s) { - value *undefined = get_undef_value(); - if (!undefined->gvn_source) - vt.add_value(undefined); - - val_set &vs = s; - - for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) { - value *v = *I; - - assert(!v->is_readonly() && !v->is_rel()); - - v->gvn_source = undefined->gvn_source; - } -} - -value* shader::create_value(value_kind k, sel_chan regid, unsigned ver) { - value *v = val_pool.create(k, regid, ver); - return v; -} - -value* shader::get_undef_value() { - if (!undef) - undef = create_value(VLK_UNDEF, 0, 0); - return undef; -} - -node* shader::create_node(node_type nt, node_subtype nst, node_flags flags) { - node *n = new (pool.allocate(sizeof(node))) node(nt, nst, flags); - all_nodes.push_back(n); - return n; -} - -alu_node* shader::create_alu() { - alu_node* n = new (pool.allocate(sizeof(alu_node))) alu_node(); - all_nodes.push_back(n); - return n; -} - -alu_group_node* shader::create_alu_group() { - alu_group_node* n = - new (pool.allocate(sizeof(alu_group_node))) alu_group_node(); - all_nodes.push_back(n); - return n; -} - -alu_packed_node* shader::create_alu_packed() { - alu_packed_node* n = - new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node(); - all_nodes.push_back(n); - return n; -} - -cf_node* shader::create_cf() { - cf_node* n = new (pool.allocate(sizeof(cf_node))) cf_node(); - n->bc.barrier = 1; - all_nodes.push_back(n); - return n; -} - -fetch_node* shader::create_fetch() { - fetch_node* n = new (pool.allocate(sizeof(fetch_node))) fetch_node(); - all_nodes.push_back(n); - return n; -} - -region_node* shader::create_region() { - region_node *n = new (pool.allocate(sizeof(region_node))) - region_node(regions.size()); - regions.push_back(n); - all_nodes.push_back(n); - return n; -} - -depart_node* shader::create_depart(region_node* target) { - depart_node* n = new (pool.allocate(sizeof(depart_node))) - depart_node(target, target->departs.size()); - target->departs.push_back(n); - all_nodes.push_back(n); - return n; -} - -repeat_node* shader::create_repeat(region_node* target) { - repeat_node* n = new (pool.allocate(sizeof(repeat_node))) - repeat_node(target, target->repeats.size() + 1); - target->repeats.push_back(n); - all_nodes.push_back(n); - return n; -} - -container_node* shader::create_container(node_type nt, node_subtype nst, - node_flags flags) { - container_node *n = new (pool.allocate(sizeof(container_node))) - container_node(nt, nst, flags); - all_nodes.push_back(n); - return n; -} - -if_node* shader::create_if() { - if_node* n = new (pool.allocate(sizeof(if_node))) if_node(); - all_nodes.push_back(n); - return n; -} - -bb_node* shader::create_bb(unsigned id, unsigned loop_level) { - bb_node* n = new (pool.allocate(sizeof(bb_node))) bb_node(id, loop_level); - all_nodes.push_back(n); - return n; -} - -value* shader::get_special_ro_value(unsigned sel) { - return get_ro_value(special_ro_values, VLK_PARAM, sel); -} - -value* shader::get_const_value(const literal &v) { - value *val = get_ro_value(const_values, VLK_CONST, v); - val->literal_value = v; - return val; -} - -shader::~shader() { - for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end(); - I != E; ++I) - (*I)->~node(); - - for (gpr_array_vec::iterator I = gpr_arrays.begin(), E = gpr_arrays.end(); - I != E; ++I) { - delete *I; - } -} - -void shader::dump_ir() { - if (ctx.dump_pass) - dump(*this).run(); -} - -value* shader::get_value_version(value* v, unsigned ver) { - assert(!v->is_readonly() && !v->is_rel()); - value *vv = get_value(v->kind, v->select, ver); - assert(vv); - - if (v->array) { - vv->array = v->array; - } - - return vv; -} - -gpr_array* shader::get_gpr_array(unsigned reg, unsigned chan) { - - for (regarray_vec::iterator I = gpr_arrays.begin(), - E = gpr_arrays.end(); I != E; ++I) { - gpr_array* a = *I; - unsigned achan = a->base_gpr.chan(); - unsigned areg = a->base_gpr.sel(); - if (achan == chan && (reg >= areg && reg < areg+a->array_size)) - return a; - } - return NULL; -} - -void shader::add_gpr_array(unsigned gpr_start, unsigned gpr_count, - unsigned comp_mask) { - unsigned chan = 0; - while (comp_mask) { - if (comp_mask & 1) { - gpr_array *a = new gpr_array( - sel_chan(gpr_start, chan), gpr_count); - - SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_gpr - << " [" << a->array_size << "]\n"; - ); - - gpr_arrays.push_back(a); - } - comp_mask >>= 1; - ++chan; - } -} - -value* shader::get_pred_sel(int sel) { - assert(sel == 0 || sel == 1); - if (!pred_sels[sel]) - pred_sels[sel] = get_const_value(sel); - - return pred_sels[sel]; -} - -cf_node* shader::create_cf(unsigned op) { - cf_node *c = create_cf(); - c->bc.set_op(op); - c->bc.barrier = 1; - return c; -} - -std::string shader::get_full_target_name() { - std::string s = get_shader_target_name(); - s += "/"; - s += ctx.get_hw_chip_name(); - s += "/"; - s += ctx.get_hw_class_name(); - return s; -} - -const char* shader::get_shader_target_name() { - switch (target) { - case TARGET_VS: return "VS"; - case TARGET_ES: return "ES"; - case TARGET_PS: return "PS"; - case TARGET_GS: return "GS"; - case TARGET_HS: return "HS"; - case TARGET_LS: return "LS"; - case TARGET_COMPUTE: return "COMPUTE"; - case TARGET_FETCH: return "FETCH"; - default: - return "INVALID_TARGET"; - } -} - -void shader::simplify_dep_rep(node* dr) { - container_node *p = dr->parent; - if (p->is_repeat()) { - repeat_node *r = static_cast(p); - r->target->expand_repeat(r); - } else if (p->is_depart()) { - depart_node *d = static_cast(p); - d->target->expand_depart(d); - } - if (dr->next) - dr->parent->cut(dr->next, NULL); -} - - -// FIXME this is used in some places as the max non-temp gpr, -// (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead. -unsigned shader::first_temp_gpr() { - return MAX_GPR - ctx.alu_temp_gprs; -} - -unsigned shader::num_nontemp_gpr() { - return MAX_GPR - 2 * ctx.alu_temp_gprs; -} - -void shader::set_uses_kill() { - if (root->src.empty()) - root->src.resize(1); - - if (!root->src[0]) - root->src[0] = get_special_value(SV_VALID_MASK); -} - -alu_node* shader::clone(alu_node* n) { - alu_node *c = create_alu(); - - // FIXME: this may be wrong with indirect operands - c->src = n->src; - c->dst = n->dst; - - c->bc = n->bc; - c->pred = n->pred; - - return c; -} - -void shader::collect_stats(bool opt) { - if (!sb_context::dump_stat) - return; - - shader_stats &s = opt ? opt_stats : src_stats; - - s.shaders = 1; - s.ngpr = ngpr; - s.nstack = nstack; - s.collect(root); - - if (opt) - ctx.opt_stats.accumulate(s); - else - ctx.src_stats.accumulate(s); -} - -value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) { - value_map::iterator I = vm.find(key); - if (I != vm.end()) - return I->second; - value *v = create_value(vk, key, 0); - v->flags = VLF_READONLY; - vm.insert(std::make_pair(key, v)); - return v; -} - -void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) { - - bool inside_bb = false; - bool last_inside_bb = true; - node_iterator bb_start(n->begin()), I(bb_start), E(n->end()); - - for (; I != E; ++I) { - node *k = *I; - inside_bb = k->type == NT_OP; - - if (inside_bb && !last_inside_bb) - bb_start = I; - else if (!inside_bb) { - if (last_inside_bb - && I->type != NT_REPEAT - && I->type != NT_DEPART - && I->type != NT_IF) { - bb_node *bb = create_bb(bbs.size(), loop_level); - bbs.push_back(bb); - n->insert_node_before(*bb_start, bb); - if (bb_start != I) - bb->move(bb_start, I); - } - - if (k->is_container()) { - - bool loop = false; - if (k->type == NT_REGION) { - loop = static_cast(k)->is_loop(); - } - - create_bbs(static_cast(k), bbs, - loop_level + loop); - } - } - - if (k->type == NT_DEPART) - return; - - last_inside_bb = inside_bb; - } - - if (last_inside_bb) { - bb_node *bb = create_bb(bbs.size(), loop_level); - bbs.push_back(bb); - if (n->empty()) - n->push_back(bb); - else { - n->insert_node_before(*bb_start, bb); - if (bb_start != n->end()) - bb->move(bb_start, n->end()); - } - } else { - if (n->last && n->last->type == NT_IF) { - bb_node *bb = create_bb(bbs.size(), loop_level); - bbs.push_back(bb); - n->push_back(bb); - } - } -} - -void shader::expand_bbs(bbs_vec &bbs) { - - for (bbs_vec::iterator I = bbs.begin(), E = bbs.end(); I != E; ++I) { - bb_node *b = *I; - b->expand(); - } -} - -sched_queue_id shader::get_queue_id(node* n) { - switch (n->subtype) { - case NST_ALU_INST: - case NST_ALU_PACKED_INST: - case NST_COPY: - case NST_PSI: - return SQ_ALU; - case NST_FETCH_INST: { - fetch_node *f = static_cast(n); - if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX)) - return SQ_VTX; - if (f->bc.op_ptr->flags & FF_GDS) - return SQ_GDS; - return SQ_TEX; - } - case NST_CF_INST: - return SQ_CF; - default: - assert(0); - return SQ_NUM; - } -} - -void shader_stats::collect(node *n) { - if (n->is_alu_inst()) - ++alu; - else if (n->is_fetch_inst()) - ++fetch; - else if (n->is_container()) { - container_node *c = static_cast(n); - - if (n->is_alu_group()) - ++alu_groups; - else if (n->is_alu_clause()) - ++alu_clauses; - else if (n->is_fetch_clause()) - ++fetch_clauses; - else if (n->is_cf_inst()) - ++cf; - - if (!c->empty()) { - for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { - collect(*I); - } - } - } -} - -void shader_stats::accumulate(shader_stats& s) { - ++shaders; - ndw += s.ndw; - ngpr += s.ngpr; - nstack += s.nstack; - - alu += s.alu; - alu_groups += s.alu_groups; - alu_clauses += s.alu_clauses; - fetch += s.fetch; - fetch_clauses += s.fetch_clauses; - cf += s.cf; -} - -void shader_stats::dump() { - sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack - << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses - << ", alu:" << alu << ", fetch:" << fetch - << ", fetch clauses:" << fetch_clauses - << ", cf:" << cf; - - if (shaders > 1) - sblog << ", shaders:" << shaders; - - sblog << "\n"; -} - -static void print_diff(unsigned d1, unsigned d2) { - if (d1) - sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%"; - else if (d2) - sblog << "N/A"; - else - sblog << "0%"; -} - -void shader_stats::dump_diff(shader_stats& s) { - sblog << "dw:"; print_diff(ndw, s.ndw); - sblog << ", gpr:" ; print_diff(ngpr, s.ngpr); - sblog << ", stk:" ; print_diff(nstack, s.nstack); - sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups); - sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses); - sblog << ", alu:" ; print_diff(alu, s.alu); - sblog << ", fetch:" ; print_diff(fetch, s.fetch); - sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses); - sblog << ", cf:" ; print_diff(cf, s.cf); - sblog << "\n"; -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_shader.h b/src/gallium/drivers/r600/sb/sb_shader.h deleted file mode 100644 index ccc70fa..0000000 --- a/src/gallium/drivers/r600/sb/sb_shader.h +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#ifndef SB_SHADER_H_ -#define SB_SHADER_H_ - -#include -#include -#include - -#include "sb_ir.h" -#include "sb_expr.h" - -namespace r600_sb { - -struct shader_input { - unsigned comp_mask; - unsigned preloaded; -}; - -struct error_info { - node *n; - unsigned arg_index; - std::string message; -}; - -typedef std::multimap error_map; - -class sb_context; - -typedef std::vector inputs_vec; -typedef std::vector gpr_array_vec; - -struct ra_edge { - value *a, *b; - unsigned cost; - - ra_edge(value *a, value *b, unsigned cost) : a(a), b(b), cost(cost) {} -}; - -enum chunk_flags { - RCF_GLOBAL = (1 << 0), - RCF_PIN_CHAN = (1 << 1), - RCF_PIN_REG = (1 << 2), - - RCF_FIXED = (1 << 3), - - RCF_PREALLOC = (1 << 4) -}; - -enum dce_flags { - DF_REMOVE_DEAD = (1 << 0), - DF_REMOVE_UNUSED = (1 << 1), - DF_EXPAND = (1 << 2), -}; - -inline dce_flags operator |(dce_flags l, dce_flags r) { - return (dce_flags)((unsigned)l|(unsigned)r); -} - -inline chunk_flags operator |(chunk_flags l, chunk_flags r) { - return (chunk_flags)((unsigned)l|(unsigned)r); -} -inline chunk_flags& operator |=(chunk_flags &l, chunk_flags r) { - l = l | r; - return l; -} - -inline chunk_flags& operator &=(chunk_flags &l, chunk_flags r) { - l = (chunk_flags)((unsigned)l & (unsigned)r); - return l; -} - -inline chunk_flags operator ~(chunk_flags r) { - return (chunk_flags)~(unsigned)r; -} - -struct ra_chunk { - vvec values; - chunk_flags flags; - unsigned cost; - sel_chan pin; - - ra_chunk() : values(), flags(), cost(), pin() {} - - bool is_fixed() { return flags & RCF_FIXED; } - void fix() { flags |= RCF_FIXED; } - - bool is_global() { return flags & RCF_GLOBAL; } - void set_global() { flags |= RCF_GLOBAL; } - - bool is_reg_pinned() { return flags & RCF_PIN_REG; } - bool is_chan_pinned() { return flags & RCF_PIN_CHAN; } - - bool is_prealloc() { return flags & RCF_PREALLOC; } - void set_prealloc() { flags |= RCF_PREALLOC; } -}; - -typedef std::vector chunk_vector; - -class ra_constraint { -public: - ra_constraint(constraint_kind kind) : kind(kind), cost(0) {} - - constraint_kind kind; - vvec values; - unsigned cost; - - void update_values(); - bool check(); -}; - -typedef std::vector constraint_vec; -typedef std::vector chunk_vec; - -// priority queue -// FIXME use something more suitale or custom class ? - -template -struct cost_compare { - bool operator ()(const T& t1, const T& t2) { - return t1->cost > t2->cost; - } -}; - -template -class queue { - typedef std::vector container; - container cont; - -public: - queue() : cont() {} - - typedef typename container::iterator iterator; - - iterator begin() { return cont.begin(); } - iterator end() { return cont.end(); } - - iterator insert(const T& t) { - iterator I = std::upper_bound(begin(), end(), t, Comp()); - if (I == end()) - cont.push_back(t); - else - cont.insert(I, t); - - return I; - } - - void erase(const T& t) { - std::pair R = - std::equal_range(begin(), end(), t, Comp()); - iterator F = std::find(R.first, R.second, t); - if (F != R.second) - cont.erase(F); - } -}; - -typedef queue > chunk_queue; -typedef queue > edge_queue; -typedef queue > constraint_queue; - -typedef std::set chunk_set; - -class shader; - -class coalescer { - - shader &sh; - - edge_queue edges; - chunk_queue chunks; - constraint_queue constraints; - - constraint_vec all_constraints; - chunk_vec all_chunks; - -public: - - coalescer(shader &sh) : sh(sh), edges(), chunks(), constraints() {} - ~coalescer(); - - int run(); - - void add_edge(value *a, value *b, unsigned cost); - void build_chunks(); - void build_constraint_queue(); - void build_chunk_queue(); - int color_constraints(); - int color_chunks(); - - ra_constraint* create_constraint(constraint_kind kind); - - enum ac_cost { - phi_cost = 10000, - copy_cost = 1, - }; - - void dump_edges(); - void dump_chunks(); - void dump_constraint_queue(); - - static void dump_chunk(ra_chunk *c); - static void dump_constraint(ra_constraint* c); - - void get_chunk_interferences(ra_chunk *c, val_set &s); - -private: - - void create_chunk(value *v); - void unify_chunks(ra_edge *e); - bool chunks_interference(ra_chunk *c1, ra_chunk *c2); - - int color_reg_constraint(ra_constraint *c); - void color_phi_constraint(ra_constraint *c); - - - void init_reg_bitset(sb_bitset &bs, val_set &vs); - - void color_chunk(ra_chunk *c, sel_chan color); - - ra_chunk* detach_value(value *v); -}; - - - -class shader { - - sb_context &ctx; - - typedef sb_map value_map; - value_map reg_values; - - // read-only values - value_map const_values; // immediate constants key -const value (uint32_t) - value_map special_ro_values; // key - hw alu_sel & chan - value_map kcache_values; - - gpr_array_vec gpr_arrays; - - unsigned next_temp_value_index; - - unsigned prep_regs_count; - - value* pred_sels[2]; - - regions_vec regions; - inputs_vec inputs; - - value *undef; - - sb_value_pool val_pool; - sb_pool pool; - - std::vector all_nodes; - -public: - shader_stats src_stats, opt_stats; - - error_map errors; - - bool optimized; - - unsigned id; - - coalescer coal; - - static const unsigned temp_regid_offset = 512; - - bbs_vec bbs; - - const shader_target target; - - expr_handler ex; - - value_table vt; - - container_node *root; - - bool compute_interferences; - - bool has_alu_predication; - bool uses_gradients; - - bool safe_math; - - unsigned ngpr, nstack; - - unsigned dce_flags; - - shader(sb_context &sctx, shader_target t, unsigned id); - - ~shader(); - - sb_context &get_ctx() const { return ctx; } - - value* get_const_value(const literal & v); - value* get_special_value(unsigned sv_id, unsigned version = 0); - value* create_temp_value(); - value* get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel, - unsigned version = 0); - - - value* get_special_ro_value(unsigned sel); - value* get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode); - - value* get_value_version(value* v, unsigned ver); - - void init(); - void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, bool src); - - void dump_ir(); - - void add_gpr_array(unsigned gpr_start, unsigned gpr_count, - unsigned comp_mask); - - value* get_pred_sel(int sel); - bool assign_slot(alu_node *n, alu_node *slots[5]); - - gpr_array* get_gpr_array(unsigned reg, unsigned chan); - - void add_input(unsigned gpr, bool preloaded = false, - unsigned comp_mask = 0xF); - - const inputs_vec & get_inputs() {return inputs; } - - regions_vec & get_regions() { return regions; } - - void init_call_fs(cf_node *cf); - - value *get_undef_value(); - void set_undef(val_set &s); - - node* create_node(node_type nt, node_subtype nst, - node_flags flags = NF_EMPTY); - alu_node* create_alu(); - alu_group_node* create_alu_group(); - alu_packed_node* create_alu_packed(); - cf_node* create_cf(); - cf_node* create_cf(unsigned op); - fetch_node* create_fetch(); - region_node* create_region(); - depart_node* create_depart(region_node *target); - repeat_node* create_repeat(region_node *target); - container_node* create_container(node_type nt = NT_LIST, - node_subtype nst = NST_LIST, - node_flags flags = NF_EMPTY); - if_node* create_if(); - bb_node* create_bb(unsigned id, unsigned loop_level); - - value* get_value_by_uid(unsigned id) { return val_pool[id - 1]; } - - cf_node* create_clause(node_subtype nst); - - void create_bbs(); - void expand_bbs(); - - alu_node* create_mov(value* dst, value* src); - alu_node* create_copy_mov(value *dst, value *src, unsigned affcost = 1); - - const char * get_shader_target_name(); - - std::string get_full_target_name(); - - void create_bbs(container_node* n, bbs_vec &bbs, int loop_level = 0); - void expand_bbs(bbs_vec &bbs); - - sched_queue_id get_queue_id(node* n); - - void simplify_dep_rep(node *dr); - - unsigned first_temp_gpr(); - unsigned num_nontemp_gpr(); - - gpr_array_vec& arrays() { return gpr_arrays; } - - void set_uses_kill(); - - void fill_array_values(gpr_array *a, vvec &vv); - - alu_node* clone(alu_node *n); - - sb_value_pool& get_value_pool() { return val_pool; } - - void collect_stats(bool opt); - -private: - value* create_value(value_kind k, sel_chan regid, unsigned ver); - value* get_value(value_kind kind, sel_chan id, - unsigned version = 0); - value* get_ro_value(value_map &vm, value_kind vk, unsigned key); -}; - -} - -#endif /* SHADER_H_ */ diff --git a/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp b/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp deleted file mode 100644 index 5cd41c2..0000000 --- a/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp +++ /dev/null @@ -1,438 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#include -#include - -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -container_node* ssa_prepare::create_phi_nodes(int count) { - container_node *p = sh.create_container(); - val_set &vars = cur_set(); - node *nn; - - for (val_set::iterator I = vars.begin(sh), E = vars.end(sh); I != E; ++I) { - nn = sh.create_node(NT_OP, NST_PHI); - nn->dst.assign(1, *I); - nn->src.assign(count, *I); - p->push_back(nn); - } - return p; -} - -void ssa_prepare::add_defs(node &n) { - val_set &s = cur_set(); - for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) { - value *v = *I; - if (!v) - continue; - - if (v->is_rel()) { - s.add_vec(v->mdef); - } else - s.add_val(v); - } -} - -bool ssa_prepare::visit(cf_node& n, bool enter) { - if (enter) { - push_stk(); - } else { - add_defs(n); - pop_stk(); - } - return true; -} - -bool ssa_prepare::visit(alu_node& n, bool enter) { - if (enter) { - } else { - add_defs(n); - } - return true; -} - -bool ssa_prepare::visit(fetch_node& n, bool enter) { - if (enter) { - } else { - add_defs(n); - } - return true; -} - -bool ssa_prepare::visit(region_node& n, bool enter) { - if (enter) { - - push_stk(); - } else { - cur_set().add_set(n.vars_defined); - if (n.dep_count() > 0) - n.phi = create_phi_nodes(n.dep_count()); - if (n.rep_count() > 1) { - n.loop_phi = create_phi_nodes(n.rep_count()); - n.loop_phi->subtype = NST_LOOP_PHI_CONTAINER; - } - n.vars_defined.clear(); - pop_stk(); - } - return true; -} - -bool ssa_prepare::visit(repeat_node& n, bool enter) { - if (enter) { - push_stk(); - } else { - assert(n.target); - n.target->vars_defined.add_set(cur_set()); - cur_set().clear(); - pop_stk(); - } - return true; -} - -bool ssa_prepare::visit(depart_node& n, bool enter) { - if (enter) { - push_stk(); - } else { - assert(n.target); - n.target->vars_defined.add_set(cur_set()); - cur_set().clear(); - pop_stk(); - } - return true; -} - -// =============================== - -int ssa_rename::init() { - rename_stack.push(def_map()); - rename_lds_oq_stack.push(def_map()); - rename_lds_rw_stack.push(def_map()); - return 0; -} - -bool ssa_rename::visit(alu_group_node& n, bool enter) { - // taking into account parallel execution of the alu group - if (enter) { - for (node_iterator I = n.begin(), E = n.end(); I != E; ++I) { - I->accept(*this, true); - } - } else { - for (node_iterator I = n.begin(), E = n.end(); I != E; ++I) { - I->accept(*this, false); - } - } - return false; -} - -bool ssa_rename::visit(cf_node& n, bool enter) { - if (enter) { - rename_src(&n); - } else { - rename_dst(&n); - } - return true; -} - -bool ssa_rename::visit(alu_node& n, bool enter) { - if (enter) { - rename_src(&n); - } else { - - node *psi = NULL; - - if (n.pred && n.dst[0]) { - - value *d = n.dst[0]; - unsigned index = get_index(rename_stack.top(), d); - value *p = sh.get_value_version(d, index); - - psi = sh.create_node(NT_OP, NST_PSI); - - container_node *parent; - if (n.parent->subtype == NST_ALU_GROUP) - parent = n.parent; - else { - assert (n.parent->parent->subtype == NST_ALU_GROUP); - parent = n.parent->parent; - } - parent->insert_after(psi); - - assert(n.bc.pred_sel); - - psi->src.resize(6); - psi->src[2] = p; - psi->src[3] = n.pred; - psi->src[4] = sh.get_pred_sel(n.bc.pred_sel - PRED_SEL_0); - psi->src[5] = d; - psi->dst.push_back(d); - } - - rename_dst(&n); - - if (psi) { - rename_src(psi); - rename_dst(psi); - } - - if (!n.dst.empty() && n.dst[0]) { - // FIXME probably use separate pass for such things - if ((n.bc.op_ptr->flags & AF_INTERP) || n.bc.op == ALU_OP2_CUBE) - n.dst[0]->flags |= VLF_PIN_CHAN; - } - } - return true; -} - -bool ssa_rename::visit(alu_packed_node& n, bool enter) { - if (enter) { - for (node_iterator I = n.begin(), E = n.end(); I != E; ++I) { - I->accept(*this, true); - } - } else { - for (node_iterator I = n.begin(), E = n.end(); I != E; ++I) { - I->accept(*this, false); - } - - bool repl = (n.op_ptr()->flags & AF_REPL) || - (ctx.is_cayman() && (n.first->alu_op_slot_flags() & AF_S)); - - n.init_args(repl); - } - return false; -} - -bool ssa_rename::visit(fetch_node& n, bool enter) { - if (enter) { - rename_src(&n); - rename_dst(&n); - } else { - } - return true; -} - -bool ssa_rename::visit(region_node& n, bool enter) { - if (enter) { - if (n.loop_phi) - rename_phi_args(n.loop_phi, 0, true); - } else { - if (n.phi) - rename_phi_args(n.phi, ~0u, true); - } - return true; -} - -bool ssa_rename::visit(repeat_node& n, bool enter) { - if (enter) { - push(n.target->loop_phi); - } else { - if (n.target->loop_phi) - rename_phi_args(n.target->loop_phi, n.rep_id, false); - pop(); - } - return true; -} - -bool ssa_rename::visit(depart_node& n, bool enter) { - if (enter) { - push(n.target->phi); - } else { - if (n.target->phi) - rename_phi_args(n.target->phi, n.dep_id, false); - pop(); - } - return true; -} - -bool ssa_rename::visit(if_node& n, bool enter) { - if (enter) { - } else { - n.cond = rename_use(&n, n.cond); - } - return true; -} - -void ssa_rename::push(node* phi) { - rename_stack.push(rename_stack.top()); -} - -void ssa_rename::pop() { - rename_stack.pop(); -} - -value* ssa_rename::rename_use(node *n, value* v) { - if (v->version) - return v; - unsigned index; - if (v->is_lds_access()) { - index = get_index(rename_lds_rw_stack.top(), v); - } else if (v->is_lds_oq()) { - index = new_index(lds_oq_count, v); - set_index(rename_lds_oq_stack.top(), v, index); - } else { - index = get_index(rename_stack.top(), v); - } - - v = sh.get_value_version(v, index); - - // if (alu) instruction is predicated and source arg comes from psi node - // (that is, from another predicated instruction through its psi node), - // we can try to select the corresponding source value directly - if (n->pred && v->def && v->def->subtype == NST_PSI) { - assert(n->subtype == NST_ALU_INST); - alu_node *an = static_cast(n); - node *pn = v->def; - // FIXME make it more generic ??? - if (pn->src.size() == 6) { - if (pn->src[3] == n->pred) { - value* ps = sh.get_pred_sel(an->bc.pred_sel - PRED_SEL_0); - if (pn->src[4] == ps) - return pn->src[5]; - else - return pn->src[2]; - } - } - } - return v; -} - -value* ssa_rename::rename_def(node *n, value* v) { - unsigned index; - - if (v->is_lds_access()) { - index = new_index(lds_rw_count, v); - set_index(rename_lds_rw_stack.top(), v, index); - } else { - index = new_index(def_count, v); - set_index(rename_stack.top(), v, index); - } - value *r = sh.get_value_version(v, index); - return r; -} - -void ssa_rename::rename_src_vec(node *n, vvec &vv, bool src) { - for(vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value* &v = *I; - if (!v || v->is_readonly()) - continue; - - if (v->is_rel()) { - if (!v->rel->is_readonly()) - v->rel = rename_use(n, v->rel); - rename_src_vec(n, v->muse, true); - } else if (src) - v = rename_use(n, v); - } -} - -void ssa_rename::rename_src(node* n) { - if (n->pred) - n->pred = rename_use(n, n->pred); - - rename_src_vec(n, n->src, true); - rename_src_vec(n, n->dst, false); - -} - -void ssa_rename::rename_dst_vec(node *n, vvec &vv, bool set_def) { - - for(vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value* &v = *I; - if (!v) - continue; - - if (v->is_rel()) { - rename_dst_vec(n, v->mdef, false); - } else { - v = rename_def(n, v); - if (set_def) - v->def = n; - } - } -} - -void ssa_rename::rename_dst(node* n) { - rename_dst_vec(n, n->dst, true); -} - -unsigned ssa_rename::get_index(def_map& m, value* v) { - def_map::iterator I = m.find(v); - if (I != m.end()) - return I->second; - return 0; -} - -void ssa_rename::set_index(def_map& m, value* v, unsigned index) { - std::pair r = m.insert(std::make_pair(v, index)); - if (!r.second) - r.first->second = index; -} - -unsigned ssa_rename::new_index(def_map& m, value* v) { - unsigned index = 1; - def_map::iterator I = m.find(v); - if (I != m.end()) - index = ++I->second; - else - m.insert(std::make_pair(v, index)); - return index; -} - -bool ssa_rename::visit(node& n, bool enter) { - if (enter) { - assert(n.subtype == NST_PSI); - rename_src(&n); - rename_dst(&n); - } - return false; -} - -bool ssa_rename::visit(container_node& n, bool enter) { - if (enter) { - } else { - // should be root container node - assert(n.parent == NULL); - rename_src_vec(&n, n.src, true); - } - return true; -} - -void ssa_rename::rename_phi_args(container_node* phi, unsigned op, bool def) { - for (node_iterator I = phi->begin(), E = phi->end(); I != E; ++I) { - node *o = *I; - if (op != ~0u) - o->src[op] = rename_use(o, o->src[op]); - if (def) { - o->dst[0] = rename_def(o, o->dst[0]); - o->dst[0]->def = o; - } - } -} - -} // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_valtable.cpp b/src/gallium/drivers/r600/sb/sb_valtable.cpp deleted file mode 100644 index f847138..0000000 --- a/src/gallium/drivers/r600/sb/sb_valtable.cpp +++ /dev/null @@ -1,599 +0,0 @@ -/* - * Copyright 2013 Vadim Girlin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Vadim Girlin - */ - -#define VT_DEBUG 0 - -#if VT_DEBUG -#define VT_DUMP(q) do { q } while (0) -#else -#define VT_DUMP(q) -#endif - -#include - -#include "sb_shader.h" -#include "sb_pass.h" - -namespace r600_sb { - -static const char * chans = "xyzw01?_"; - -sb_ostream& operator << (sb_ostream &o, value &v) { - - bool dead = v.flags & VLF_DEAD; - - if (dead) - o << "{"; - - switch (v.kind) { - case VLK_SPECIAL_REG: { - switch (v.select.sel()) { - case SV_AR_INDEX: o << "AR"; break; - case SV_ALU_PRED: o << "PR"; break; - case SV_EXEC_MASK: o << "EM"; break; - case SV_VALID_MASK: o << "VM"; break; - case SV_GEOMETRY_EMIT: o << "GEOMETRY_EMIT"; break; - case SV_LDS_RW: o << "LDS_RW"; break; - case SV_LDS_OQA: o << "LDS_OQA"; break; - case SV_LDS_OQB: o << "LDS_OQB"; break; - case SV_SCRATCH: o << "SCRATCH"; break; - default: o << "???specialreg"; break; - } - break; - } - - case VLK_REG: - o << "R" << v.select.sel() << "." - << chans[v.select.chan()]; - - break; - case VLK_KCACHE: { - o << "C" << v.select.sel() << "." << chans[v.select.chan()]; - } - break; - case VLK_CONST: - o << v.literal_value.f << "|"; - o.print_zw_hex(v.literal_value.u, 8); - break; - case VLK_PARAM: - o << "Param" << (v.select.sel() - ALU_SRC_PARAM_OFFSET) - << chans[v.select.chan()]; - break; - case VLK_TEMP: - o << "t" << v.select.sel() - shader::temp_regid_offset; - break; - case VLK_REL_REG: - - o << "A" << v.select; - o << "["; - o << *v.rel; - o << "]"; - - o << "_" << v.uid; - - break; - case VLK_UNDEF: - o << "undef"; - break; - default: - o << v.kind << "?????"; - break; - } - - if (v.version) - o << "." << v.version; - - if (dead) - o << "}"; - - if (v.is_global()) - o << "||"; - if (v.is_fixed()) - o << "F"; - if (v.is_prealloc()) - o << "P"; - - sel_chan g; - - if (v.is_rel()) { - g = v.array->gpr; - } else { - g = v.gpr; - } - - if (g) { - o << "@R" << g.sel() << "." << chans[g.chan()]; - } - - return o; -} - -void value_table::add_value(value* v) { - - if (v->gvn_source) { - return; - } - - VT_DUMP( - sblog << "gvn add_value "; - dump::dump_val(v); - ); - - value_hash hash = v->hash(); - vt_item & vti = hashtable[hash & size_mask]; - vti.push_back(v); - ++cnt; - - if (v->def && ex.try_fold(v)) { - VT_DUMP( - sblog << " folded: "; - dump::dump_val(v->gvn_source); - sblog << "\n"; - ); - return; - } - - int n = 0; - for (vt_item::iterator I = vti.begin(), E = vti.end(); I != E; ++I, ++n) { - value *c = *I; - - if (c == v) - break; - - if (expr_equal(c, v)) { - v->gvn_source = c->gvn_source; - - VT_DUMP( - sblog << " found : equal to "; - dump::dump_val(v->gvn_source); - sblog << "\n"; - ); - return; - } - } - - v->gvn_source = v; - VT_DUMP( - sblog << " added new\n"; - ); -} - -value_hash value::hash() { - if (ghash) - return ghash; - if (is_rel()) - ghash = rel_hash(); - else if (def) - ghash = def->hash(); - else - ghash = ((uintptr_t)this) | 1; - - return ghash; -} - -value_hash value::rel_hash() { - value_hash h = rel ? rel->hash() : 0; - h |= select << 10; - h |= array->hash(); - return h; -} - -bool value_table::expr_equal(value* l, value* r) { - return ex.equal(l, r); -} - -void value_table::get_values(vvec& v) { - v.resize(cnt); - - vvec::iterator T = v.begin(); - - for(vt_table::iterator I = hashtable.begin(), E = hashtable.end(); - I != E; ++I) { - T = std::copy(I->begin(), I->end(), T); - } -} - -void value::add_use(node* n) { - if (0) { - sblog << "add_use "; - dump::dump_val(this); - sblog << " => "; - dump::dump_op(n); - } - uses.push_back(n); -} - -struct use_node_comp { - explicit use_node_comp(const node *n) : n(n) {} - bool operator() (const node *o) { - return o->hash() == n->hash(); - } - - private: - const node *n; -}; - -void value::remove_use(const node *n) { - uselist::iterator it = - std::find_if(uses.begin(), uses.end(), use_node_comp(n)); - - if (it != uses.end()) - { - // We only ever had a pointer, so don't delete it here - uses.erase(it); - } -} - -unsigned value::use_count() { - return uses.size(); -} - -bool value::is_global() { - if (chunk) - return chunk->is_global(); - return flags & VLF_GLOBAL; -} - -void value::set_global() { - assert(is_sgpr()); - flags |= VLF_GLOBAL; - if (chunk) - chunk->set_global(); -} - -void value::set_prealloc() { - assert(is_sgpr()); - flags |= VLF_PREALLOC; - if (chunk) - chunk->set_prealloc(); -} - -bool value::is_fixed() { - if (array && array->gpr) - return true; - if (chunk && chunk->is_fixed()) - return true; - return flags & VLF_FIXED; -} - -void value::fix() { - if (chunk) - chunk->fix(); - flags |= VLF_FIXED; -} - -bool value::is_prealloc() { - if (chunk) - return chunk->is_prealloc(); - return flags & VLF_PREALLOC; -} - -void value::delete_uses() { - // We only ever had pointers, so don't delete them here - uses.erase(uses.begin(), uses.end()); -} - -bool value::no_reladdr_conflict_with(value *src) -{ - /* if the register is not relative, it can't create an relative access conflict */ - if (!src->is_rel()) - return true; - - /* If the destination is AR then we accept the copy propagation, because the - * scheduler actually re-creates the address loading operation and will - * signal interference if there is an address register load and it will fail - * because of this. - */ - if (gvalue()->is_AR()) - return true; - - /* For all nodes that use this value test whether the operation uses another - * relative access with a different address value. If found, signal conflict. - */ - for (uselist::const_iterator N = uses.begin(); N != uses.end(); ++N) { - for (vvec::const_iterator V = (*N)->src.begin(); V != (*N)->src.end(); ++V) { - if (*V) { - value *v = (*V)->gvalue(); - if (v != src && v->is_rel() && v->rel != src->rel) - return false; - } - } - for (vvec::const_iterator V = (*N)->dst.begin(); V != (*N)->dst.end(); ++V) { - if (*V) { - value *v = (*V)->gvalue(); - if (v && v != src && v->is_rel() && (v->rel != src->rel)) - return false; - } - } - } - return true; -} - -void ra_constraint::update_values() { - for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) { - assert(!(*I)->constraint); - (*I)->constraint = this; - } -} - -void* sb_pool::allocate(unsigned sz) { - sz = (sz + SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1); - assert (sz < (block_size >> 6) && "too big allocation size for sb_pool"); - - unsigned offset = total_size % block_size; - unsigned capacity = block_size * blocks.size(); - - if (total_size + sz > capacity) { - total_size = capacity; - void * nb = malloc(block_size); - blocks.push_back(nb); - offset = 0; - } - - total_size += sz; - return ((char*)blocks.back() + offset); -} - -void sb_pool::free_all() { - for (block_vector::iterator I = blocks.begin(), E = blocks.end(); I != E; - ++I) { - free(*I); - } -} - -value* sb_value_pool::create(value_kind k, sel_chan regid, - unsigned ver) { - void* np = allocate(aligned_elt_size); - value *v = new (np) value(size(), k, regid, ver); - return v; -} - -void sb_value_pool::delete_all() { - unsigned bcnt = blocks.size(); - unsigned toffset = 0; - for (unsigned b = 0; b < bcnt; ++b) { - char *bstart = (char*)blocks[b]; - for (unsigned offset = 0; offset < block_size; - offset += aligned_elt_size) { - ((value*)(bstart + offset))->~value(); - toffset += aligned_elt_size; - if (toffset >= total_size) - return; - } - } -} - -bool sb_bitset::get(unsigned id) { - assert(id < bit_size); - unsigned w = id / bt_bits; - unsigned b = id % bt_bits; - return (data[w] >> b) & 1; -} - -void sb_bitset::set(unsigned id, bool bit) { - assert(id < bit_size); - unsigned w = id / bt_bits; - unsigned b = id % bt_bits; - if (w >= data.size()) - data.resize(w + 1); - - if (bit) - data[w] |= (1 << b); - else - data[w] &= ~(1 << b); -} - -inline bool sb_bitset::set_chk(unsigned id, bool bit) { - assert(id < bit_size); - unsigned w = id / bt_bits; - unsigned b = id % bt_bits; - basetype d = data[w]; - basetype dn = (d & ~(1 << b)) | (bit << b); - bool r = (d != dn); - data[w] = r ? dn : data[w]; - return r; -} - -void sb_bitset::clear() { - std::fill(data.begin(), data.end(), 0); -} - -void sb_bitset::resize(unsigned size) { - unsigned cur_data_size = data.size(); - unsigned new_data_size = (size + bt_bits - 1) / bt_bits; - - - if (new_data_size != cur_data_size) - data.resize(new_data_size); - - // make sure that new bits in the existing word are cleared - if (cur_data_size && size > bit_size && bit_size % bt_bits) { - basetype clear_mask = (~(basetype)0u) << (bit_size % bt_bits); - data[cur_data_size - 1] &= ~clear_mask; - } - - bit_size = size; -} - -unsigned sb_bitset::find_bit(unsigned start) { - assert(start < bit_size); - unsigned w = start / bt_bits; - unsigned b = start % bt_bits; - unsigned sz = data.size(); - - while (w < sz) { - basetype d = data[w] >> b; - if (d != 0) { - unsigned pos = __builtin_ctz(d) + b + w * bt_bits; - return pos; - } - - b = 0; - ++w; - } - - return bit_size; -} - -sb_value_set::iterator::iterator(shader& sh, sb_value_set* s, unsigned nb) - : vp(sh.get_value_pool()), s(s), nb(nb) {} - -bool sb_value_set::add_set_checked(sb_value_set& s2) { - if (bs.size() < s2.bs.size()) - bs.resize(s2.bs.size()); - sb_bitset nbs = bs | s2.bs; - if (bs != nbs) { - bs.swap(nbs); - return true; - } - return false; -} - -void r600_sb::sb_value_set::remove_set(sb_value_set& s2) { - bs.mask(s2.bs); -} - -bool sb_value_set::add_val(value* v) { - assert(v); - if (bs.size() < v->uid) - bs.resize(v->uid + 32); - - return bs.set_chk(v->uid - 1, 1); -} - -bool sb_value_set::remove_vec(vvec& vv) { - bool modified = false; - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - if (*I) - modified |= remove_val(*I); - } - return modified; -} - -void sb_value_set::clear() { - bs.clear(); -} - -bool sb_value_set::remove_val(value* v) { - assert(v); - if (bs.size() < v->uid) - return false; - return bs.set_chk(v->uid - 1, 0); -} - -bool r600_sb::sb_value_set::add_vec(vvec& vv) { - bool modified = false; - for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { - value *v = *I; - if (v) - modified |= add_val(v); - } - return modified; -} - -bool r600_sb::sb_value_set::contains(value* v) { - unsigned b = v->uid - 1; - if (b < bs.size()) - return bs.get(b); - else - return false; -} - -bool sb_value_set::empty() { - return bs.size() == 0 || bs.find_bit(0) == bs.size(); -} - -void sb_bitset::swap(sb_bitset& bs2) { - std::swap(data, bs2.data); - std::swap(bit_size, bs2.bit_size); -} - -bool sb_bitset::operator ==(const sb_bitset& bs2) { - if (bit_size != bs2.bit_size) - return false; - - for (unsigned i = 0, c = data.size(); i < c; ++i) { - if (data[i] != bs2.data[i]) - return false; - } - return true; -} - -sb_bitset& sb_bitset::operator &=(const sb_bitset& bs2) { - if (bit_size > bs2.bit_size) { - resize(bs2.bit_size); - } - - for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c; - ++i) { - data[i] &= bs2.data[i]; - } - return *this; -} - -sb_bitset& sb_bitset::mask(const sb_bitset& bs2) { - if (bit_size < bs2.bit_size) { - resize(bs2.bit_size); - } - - for (unsigned i = 0, c = data.size(); i < c; - ++i) { - data[i] &= ~bs2.data[i]; - } - return *this; -} - -bool ra_constraint::check() { - assert(kind == CK_SAME_REG); - - unsigned reg = 0; - - for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) { - value *v = *I; - if (!v) - continue; - - if (!v->gpr) - return false; - - if (reg == 0) - reg = v->gpr.sel() + 1; - else if (reg != v->gpr.sel() + 1) - return false; - - if (v->is_chan_pinned()) { - if (v->pin_gpr.chan() != v->gpr.chan()) - return false; - } - } - return true; -} - -bool gpr_array::is_dead() { - return false; -} - -} // namespace r600_sb