freedreno/ir3: array rework
authorRob Clark <robclark@freedesktop.org>
Sun, 10 Jan 2016 19:10:08 +0000 (14:10 -0500)
committerRob Clark <robclark@freedesktop.org>
Sat, 16 Jan 2016 19:21:08 +0000 (14:21 -0500)
Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/freedreno_screen.c
src/gallium/drivers/freedreno/ir3/ir3.c
src/gallium/drivers/freedreno/ir3/ir3.h
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
src/gallium/drivers/freedreno/ir3/ir3_cp.c
src/gallium/drivers/freedreno/ir3/ir3_depth.c
src/gallium/drivers/freedreno/ir3/ir3_print.c
src/gallium/drivers/freedreno/ir3/ir3_ra.c
src/gallium/drivers/freedreno/ir3/ir3_sched.c

index a75b04b..6562924 100644 (file)
@@ -400,9 +400,16 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
                return 1;
        case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
        case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+               /* Technically this should be the same as for TEMP/CONST, since
+                * everything is just normal registers.  This is just temporary
+                * hack until load_input/store_output handle arrays in a similar
+                * way as load_var/store_var..
+                */
+               return 0;
        case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
        case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
-               return 1;
+               /* a2xx compiler doesn't handle indirect: */
+               return is_ir3(screen) ? 1 : 0;
        case PIPE_SHADER_CAP_SUBROUTINES:
        case PIPE_SHADER_CAP_DOUBLES:
        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
index b24825c..be415d8 100644 (file)
@@ -81,6 +81,7 @@ struct ir3 * ir3_create(struct ir3_compiler *compiler,
        shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
 
        list_inithead(&shader->block_list);
+       list_inithead(&shader->array_list);
 
        return shader;
 }
@@ -121,18 +122,19 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
                val.iim_val = reg->iim_val;
        } else {
                unsigned components;
+               int16_t max;
 
                if (reg->flags & IR3_REG_RELATIV) {
                        components = reg->size;
-                       val.dummy10 = reg->offset;
+                       val.dummy10 = reg->array.offset;
+                       max = (reg->array.offset + repeat + components - 1) >> 2;
                } else {
                        components = util_last_bit(reg->wrmask);
                        val.comp = reg->num & 0x3;
                        val.num  = reg->num >> 2;
+                       max = (reg->num + repeat + components - 1) >> 2;
                }
 
-               int16_t max = (reg->num + repeat + components - 1) >> 2;
-
                if (reg->flags & IR3_REG_CONST) {
                        info->max_const = MAX2(info->max_const, max);
                } else if (val.num == 63) {
@@ -233,7 +235,7 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
        iassert((instr->regs_count == 2) || (instr->regs_count == 3));
 
        if (src1->flags & IR3_REG_RELATIV) {
-               iassert(src1->num < (1 << 10));
+               iassert(src1->array.offset < (1 << 10));
                cat2->rel1.src1      = reg(src1, info, instr->repeat,
                                IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
                                IR3_REG_HALF | absneg);
@@ -260,7 +262,7 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
                                !((src1->flags ^ src2->flags) & IR3_REG_HALF));
 
                if (src2->flags & IR3_REG_RELATIV) {
-                       iassert(src2->num < (1 << 10));
+                       iassert(src2->array.offset < (1 << 10));
                        cat2->rel2.src2      = reg(src2, info, instr->repeat,
                                        IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
                                        IR3_REG_HALF | absneg);
@@ -333,7 +335,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
        iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
 
        if (src1->flags & IR3_REG_RELATIV) {
-               iassert(src1->num < (1 << 10));
+               iassert(src1->array.offset < (1 << 10));
                cat3->rel1.src1      = reg(src1, info, instr->repeat,
                                IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
                                IR3_REG_HALF | absneg);
@@ -361,7 +363,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
 
 
        if (src3->flags & IR3_REG_RELATIV) {
-               iassert(src3->num < (1 << 10));
+               iassert(src3->array.offset < (1 << 10));
                cat3->rel2.src3      = reg(src3, info, instr->repeat,
                                IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
                                IR3_REG_HALF | absneg);
@@ -404,7 +406,7 @@ static int emit_cat4(struct ir3_instruction *instr, void *ptr,
        iassert(instr->regs_count == 2);
 
        if (src->flags & IR3_REG_RELATIV) {
-               iassert(src->num < (1 << 10));
+               iassert(src->array.offset < (1 << 10));
                cat4->rel.src      = reg(src, info, instr->repeat,
                                IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
                                IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
@@ -737,6 +739,14 @@ struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
        return reg;
 }
 
+struct ir3_register * ir3_reg_clone(struct ir3 *shader,
+               struct ir3_register *reg)
+{
+       struct ir3_register *new_reg = reg_create(shader, 0, 0);
+       *new_reg = *reg;
+       return new_reg;
+}
+
 void
 ir3_instr_set_address(struct ir3_instruction *instr,
                struct ir3_instruction *addr)
@@ -777,3 +787,12 @@ ir3_count_instructions(struct ir3 *ir)
        }
        return cnt;
 }
+
+struct ir3_array *
+ir3_lookup_array(struct ir3 *ir, unsigned id)
+{
+       list_for_each_entry (struct ir3_array, arr, &ir->array_list, node)
+               if (arr->id == id)
+                       return arr;
+       return NULL;
+}
index 62d14a0..1e5a1e9 100644 (file)
@@ -83,7 +83,8 @@ struct ir3_register {
                 * before register assignment is done:
                 */
                IR3_REG_SSA    = 0x2000,   /* 'instr' is ptr to assigning instr */
-               IR3_REG_PHI_SRC= 0x4000,   /* phi src, regs[0]->instr points to phi */
+               IR3_REG_ARRAY  = 0x4000,
+               IR3_REG_PHI_SRC= 0x8000,   /* phi src, regs[0]->instr points to phi */
 
        } flags;
        union {
@@ -97,11 +98,18 @@ struct ir3_register {
                uint32_t uim_val;
                float    fim_val;
                /* relative: */
-               int   offset;
+               struct {
+                       uint16_t id;
+                       uint16_t offset;
+               } array;
        };
 
-       /* for IR3_REG_SSA, src registers contain ptr back to
-        * assigning instruction.
+       /* For IR3_REG_SSA, src registers contain ptr back to assigning
+        * instruction.
+        *
+        * For IR3_REG_ARRAY, the pointer is back to the last dependent
+        * array access (although the net effect is the same, it points
+        * back to a previous instruction that we depend on).
         */
        struct ir3_instruction *instr;
 
@@ -222,9 +230,6 @@ struct ir3_instruction {
                        int off;              /* component/offset */
                } fo;
                struct {
-                       int aid;
-               } fi;
-               struct {
                        /* used to temporarily hold reference to nir_phi_instr
                         * until we resolve the phi srcs
                         */
@@ -293,19 +298,6 @@ struct ir3_instruction {
         */
        struct ir3_instruction *address;
 
-       /* in case of a instruction with relative dst instruction, we need to
-        * capture the dependency on the fanin for the previous values of
-        * the array elements.  Since we don't know at compile time actually
-        * which array elements are written, this serves to preserve the
-        * unconditional write to array elements prior to the conditional
-        * write.
-        *
-        * TODO only cat1 can do indirect write.. we could maybe move this
-        * into instr->cat1.fanin (but would require the frontend to insert
-        * the extra mov)
-        */
-       struct ir3_instruction *fanin;
-
        /* Entry in ir3_block's instruction list: */
        struct list_head node;
 
@@ -379,10 +371,39 @@ struct ir3 {
        /* List of blocks: */
        struct list_head block_list;
 
+       /* List of ir3_array's: */
+       struct list_head array_list;
+
        unsigned heap_idx;
        struct ir3_heap_chunk *chunk;
 };
 
+typedef struct nir_variable nir_variable;
+
+struct ir3_array {
+       struct list_head node;
+       unsigned length;
+       unsigned id;
+
+       nir_variable *var;
+
+       /* We track the last write and last access (read or write) to
+        * setup dependencies on instructions that read or write the
+        * array.  Reads can be re-ordered wrt. other reads, but should
+        * not be re-ordered wrt. to writes.  Writes cannot be reordered
+        * wrt. any other access to the array.
+        *
+        * So array reads depend on last write, and array writes depend
+        * on the last access.
+        */
+       struct ir3_instruction *last_write, *last_access;
+
+       /* extra stuff used in RA pass: */
+       unsigned base;
+};
+
+struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id);
+
 typedef struct nir_block nir_block;
 
 struct ir3_block {
@@ -430,6 +451,8 @@ const char *ir3_instr_name(struct ir3_instruction *instr);
 
 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
                int num, int flags);
+struct ir3_register * ir3_reg_clone(struct ir3 *shader,
+               struct ir3_register *reg);
 
 void ir3_instr_set_address(struct ir3_instruction *instr,
                struct ir3_instruction *addr);
@@ -510,6 +533,9 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr)
        if (dst->num == regid(REG_A0, 0))
                return false;
 
+       if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
+               return false;
+
        if ((instr->category == 1) &&
                        (instr->cat1.src_type == instr->cat1.dst_type))
                return true;
@@ -623,8 +649,10 @@ static inline bool writes_pred(struct ir3_instruction *instr)
 /* TODO better name */
 static inline struct ir3_instruction *ssa(struct ir3_register *reg)
 {
-       if (reg->flags & IR3_REG_SSA)
+       if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) {
+               debug_assert(!(reg->instr && (reg->instr->flags & IR3_INSTR_UNUSED)));
                return reg->instr;
+       }
        return NULL;
 }
 
@@ -813,8 +841,6 @@ static inline unsigned ir3_cat3_absneg(opc_t opc)
 
 static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
 {
-       if (instr->fanin)
-               return instr->regs_count + 2;
        if (instr->address)
                return instr->regs_count + 1;
        return instr->regs_count;
@@ -822,8 +848,6 @@ static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
 
 static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
 {
-       if (n == (instr->regs_count + 1))
-               return instr->fanin;
        if (n == (instr->regs_count + 0))
                return instr->address;
        return ssa(instr->regs[n]);
@@ -834,8 +858,8 @@ static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr
 /* iterator for an instruction's SSA sources (instr), also returns src #: */
 #define foreach_ssa_src_n(__srcinst, __n, __instr) \
        if ((__instr)->regs_count) \
-               for (unsigned __cnt = __ssa_src_cnt(__instr) - 1, __n = 0; __n < __cnt; __n++) \
-                       if ((__srcinst = __ssa_src_n(__instr, __n + 1)))
+               for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \
+                       if ((__srcinst = __ssa_src_n(__instr, __n)))
 
 /* iterator for an instruction's SSA sources (instr): */
 #define foreach_ssa_src(__srcinst, __instr) \
@@ -878,7 +902,15 @@ ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
        struct ir3_instruction *instr =
                ir3_instr_create(block, 1, 0);
        ir3_reg_create(instr, 0, 0);   /* dst */
-       ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+       if (src->regs[0]->flags & IR3_REG_ARRAY) {
+               struct ir3_register *src_reg =
+                       ir3_reg_create(instr, 0, IR3_REG_ARRAY);
+               src_reg->array = src->regs[0]->array;
+               src_reg->instr = src;
+       } else {
+               ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+       }
+       debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV));
        instr->cat1.src_type = type;
        instr->cat1.dst_type = type;
        return instr;
@@ -894,6 +926,7 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
        ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
        instr->cat1.src_type = src_type;
        instr->cat1.dst_type = dst_type;
+       debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY));
        return instr;
 }
 
@@ -1083,7 +1116,7 @@ typedef uint8_t regmask_t[2 * MAX_REG / 8];
 
 static inline unsigned regmask_idx(struct ir3_register *reg)
 {
-       unsigned num = reg->num;
+       unsigned num = (reg->flags & IR3_REG_RELATIV) ? reg->array.offset : reg->num;
        debug_assert(num < MAX_REG);
        if (reg->flags & IR3_REG_HALF)
                num += MAX_REG;
index e5d3909..bd0ee89 100644 (file)
@@ -74,8 +74,6 @@ struct ir3_compile {
        /* mapping from nir_register to defining instruction: */
        struct hash_table *def_ht;
 
-       /* mapping from nir_variable to ir3_array: */
-       struct hash_table *var_ht;
        unsigned num_arrays;
 
        /* a common pattern for indirect addressing is to request the
@@ -142,8 +140,6 @@ compile_init(struct ir3_compiler *compiler,
        ctx->so = so;
        ctx->def_ht = _mesa_hash_table_create(ctx,
                        _mesa_hash_pointer, _mesa_key_pointer_equal);
-       ctx->var_ht = _mesa_hash_table_create(ctx,
-                       _mesa_hash_pointer, _mesa_key_pointer_equal);
        ctx->block_ht = _mesa_hash_table_create(ctx,
                        _mesa_hash_pointer, _mesa_key_pointer_equal);
 
@@ -220,206 +216,26 @@ compile_free(struct ir3_compile *ctx)
        ralloc_free(ctx);
 }
 
-/* global per-array information: */
-struct ir3_array {
-       unsigned length, aid;
-};
-
-/* per-block array state: */
-struct ir3_array_value {
-       /* TODO drop length/aid, and just have ptr back to ir3_array */
-       unsigned length, aid;
-       /* initial array element values are phi's, other than for the
-        * entry block.  The phi src's get added later in a resolve step
-        * after we have visited all the blocks, to account for back
-        * edges in the cfg.
-        */
-       struct ir3_instruction **phis;
-       /* current array element values (as block is processed).  When
-        * the array phi's are resolved, it will contain the array state
-        * at exit of block, so successor blocks can use it to add their
-        * phi srcs.
-        */
-       struct ir3_instruction *arr[];
-};
-
-/* track array assignments per basic block.  When an array is read
- * outside of the same basic block, we can use NIR's dominance-frontier
- * information to figure out where phi nodes are needed.
- */
-struct ir3_nir_block_data {
-       unsigned foo;
-       /* indexed by array-id (aid): */
-       struct ir3_array_value *arrs[];
-};
-
-static struct ir3_nir_block_data *
-get_block_data(struct ir3_compile *ctx, struct ir3_block *block)
-{
-       if (!block->data) {
-               struct ir3_nir_block_data *bd = ralloc_size(ctx, sizeof(*bd) +
-                               ((ctx->num_arrays + 1) * sizeof(bd->arrs[0])));
-               block->data = bd;
-       }
-       return block->data;
-}
-
 static void
 declare_var(struct ir3_compile *ctx, nir_variable *var)
 {
        unsigned length = glsl_get_length(var->type) * 4;  /* always vec4, at least with ttn */
        struct ir3_array *arr = ralloc(ctx, struct ir3_array);
+       arr->id = ++ctx->num_arrays;
        arr->length = length;
-       arr->aid = ++ctx->num_arrays;
-       _mesa_hash_table_insert(ctx->var_ht, var, arr);
+       arr->var = var;
+       list_addtail(&arr->node, &ctx->ir->array_list);
 }
 
-static nir_block *
-nir_block_pred(nir_block *block)
-{
-       assert(block->predecessors->entries < 2);
-       if (block->predecessors->entries == 0)
-               return NULL;
-       return (nir_block *)_mesa_set_next_entry(block->predecessors, NULL)->key;
-}
-
-static struct ir3_array_value *
+static struct ir3_array *
 get_var(struct ir3_compile *ctx, nir_variable *var)
 {
-       struct hash_entry *entry = _mesa_hash_table_search(ctx->var_ht, var);
-       struct ir3_block *block = ctx->block;
-       struct ir3_nir_block_data *bd = get_block_data(ctx, block);
-       struct ir3_array *arr = entry->data;
-
-       if (!bd->arrs[arr->aid]) {
-               struct ir3_array_value *av = ralloc_size(bd, sizeof(*av) +
-                               (arr->length * sizeof(av->arr[0])));
-               struct ir3_array_value *defn = NULL;
-               nir_block *pred_block;
-
-               av->length = arr->length;
-               av->aid = arr->aid;
-
-               /* For loops, we have to consider that we have not visited some
-                * of the blocks who should feed into the phi (ie. back-edges in
-                * the cfg).. for example:
-                *
-                *   loop {
-                *      block { load_var; ... }
-                *      if then block {} else block {}
-                *      block { store_var; ... }
-                *      if then block {} else block {}
-                *      block {...}
-                *   }
-                *
-                * We can skip the phi if we can chase the block predecessors
-                * until finding the block previously defining the array without
-                * crossing a block that has more than one predecessor.
-                *
-                * Otherwise create phi's and resolve them as a post-pass after
-                * all the blocks have been visited (to handle back-edges).
-                */
-
-               for (pred_block = block->nblock;
-                               pred_block && (pred_block->predecessors->entries < 2) && !defn;
-                               pred_block = nir_block_pred(pred_block)) {
-                       struct ir3_block *pblock = get_block(ctx, pred_block);
-                       struct ir3_nir_block_data *pbd = pblock->data;
-                       if (!pbd)
-                               continue;
-                       defn = pbd->arrs[arr->aid];
-               }
-
-               if (defn) {
-                       /* only one possible definer: */
-                       for (unsigned i = 0; i < arr->length; i++)
-                               av->arr[i] = defn->arr[i];
-               } else if (pred_block) {
-                       /* not the first block, and multiple potential definers: */
-                       av->phis = ralloc_size(av, arr->length * sizeof(av->phis[0]));
-
-                       for (unsigned i = 0; i < arr->length; i++) {
-                               struct ir3_instruction *phi;
-
-                               phi = ir3_instr_create2(block, -1, OPC_META_PHI,
-                                               1 + ctx->impl->num_blocks);
-                               ir3_reg_create(phi, 0, 0);         /* dst */
-
-                               /* phi's should go at head of block: */
-                               list_delinit(&phi->node);
-                               list_add(&phi->node, &block->instr_list);
-
-                               av->phis[i] = av->arr[i] = phi;
-                       }
-               } else {
-                       /* Some shaders end up reading array elements without
-                        * first writing.. so initialize things to prevent null
-                        * instr ptrs later:
-                        */
-                       for (unsigned i = 0; i < arr->length; i++)
-                               av->arr[i] = create_immed(block, 0);
-               }
-
-               bd->arrs[arr->aid] = av;
-       }
-
-       return bd->arrs[arr->aid];
-}
-
-static void
-add_array_phi_srcs(struct ir3_compile *ctx, nir_block *nblock,
-               struct ir3_array_value *av, BITSET_WORD *visited)
-{
-       struct ir3_block *block;
-       struct ir3_nir_block_data *bd;
-
-       if (BITSET_TEST(visited, nblock->index))
-               return;
-
-       BITSET_SET(visited, nblock->index);
-
-       block = get_block(ctx, nblock);
-       bd = block->data;
-
-       if (bd && bd->arrs[av->aid]) {
-               struct ir3_array_value *dav = bd->arrs[av->aid];
-               for (unsigned i = 0; i < av->length; i++) {
-                       ir3_reg_create(av->phis[i], 0, IR3_REG_SSA)->instr =
-                                       dav->arr[i];
-               }
-       } else {
-               /* didn't find defn, recurse predecessors: */
-               struct set_entry *entry;
-               set_foreach(nblock->predecessors, entry) {
-                       add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
-               }
-       }
-}
-
-static void
-resolve_array_phis(struct ir3_compile *ctx, struct ir3_block *block)
-{
-       struct ir3_nir_block_data *bd = block->data;
-       unsigned bitset_words = BITSET_WORDS(ctx->impl->num_blocks);
-
-       if (!bd)
-               return;
-
-       /* TODO use nir dom_frontier to help us with this? */
-
-       for (unsigned i = 1; i <= ctx->num_arrays; i++) {
-               struct ir3_array_value *av = bd->arrs[i];
-               BITSET_WORD visited[bitset_words];
-               struct set_entry *entry;
-
-               if (!(av && av->phis))
-                       continue;
-
-               memset(visited, 0, sizeof(visited));
-               set_foreach(block->nblock->predecessors, entry) {
-                       add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
-               }
+       list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+               if (arr->var == var)
+                       return arr;
        }
+       compile_error(ctx, "bogus var: %s\n", var->name);
+       return NULL;
 }
 
 /* allocate a n element value array (to be populated by caller) and
@@ -437,6 +253,7 @@ __get_dst(struct ir3_compile *ctx, void *key, unsigned n)
 static struct ir3_instruction **
 get_dst(struct ir3_compile *ctx, nir_dest *dst, unsigned n)
 {
+       compile_assert(ctx, dst->is_ssa);
        if (dst->is_ssa) {
                return __get_dst(ctx, &dst->ssa, n);
        } else {
@@ -454,6 +271,7 @@ static struct ir3_instruction **
 get_src(struct ir3_compile *ctx, nir_src *src)
 {
        struct hash_entry *entry;
+       compile_assert(ctx, src->is_ssa);
        if (src->is_ssa) {
                entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
        } else {
@@ -568,7 +386,7 @@ create_uniform_indirect(struct ir3_compile *ctx, unsigned n,
        mov->cat1.src_type = TYPE_U32;
        mov->cat1.dst_type = TYPE_U32;
        ir3_reg_create(mov, 0, 0);
-       ir3_reg_create(mov, n, IR3_REG_CONST | IR3_REG_RELATIV);
+       ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n;
 
        ir3_instr_set_address(mov, address);
 
@@ -607,17 +425,45 @@ create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
        src = ir3_reg_create(mov, 0, IR3_REG_SSA | IR3_REG_RELATIV);
        src->instr = collect;
        src->size  = arrsz;
-       src->offset = n;
+       src->array.offset = n;
 
        ir3_instr_set_address(mov, address);
 
        return mov;
 }
 
+/* relative (indirect) if address!=NULL */
+static struct ir3_instruction *
+create_var_load(struct ir3_compile *ctx, struct ir3_array *arr, unsigned n,
+               struct ir3_instruction *address)
+{
+       struct ir3_block *block = ctx->block;
+       struct ir3_instruction *mov;
+       struct ir3_register *src;
+
+       mov = ir3_instr_create(block, 1, 0);
+       mov->cat1.src_type = TYPE_U32;
+       mov->cat1.dst_type = TYPE_U32;
+       ir3_reg_create(mov, 0, 0);
+       src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
+                       COND(address, IR3_REG_RELATIV));
+       src->instr = arr->last_write;
+       src->size  = arr->length;
+       src->array.id = arr->id;
+       src->array.offset = n;
+
+       if (address)
+               ir3_instr_set_address(mov, address);
+
+       arr->last_access = mov;
+
+       return mov;
+}
+
+/* relative (indirect) if address!=NULL */
 static struct ir3_instruction *
-create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
-               struct ir3_instruction *src, struct ir3_instruction *address,
-               struct ir3_instruction *collect)
+create_var_store(struct ir3_compile *ctx, struct ir3_array *arr, unsigned n,
+               struct ir3_instruction *src, struct ir3_instruction *address)
 {
        struct ir3_block *block = ctx->block;
        struct ir3_instruction *mov;
@@ -626,14 +472,18 @@ create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
        mov = ir3_instr_create(block, 1, 0);
        mov->cat1.src_type = TYPE_U32;
        mov->cat1.dst_type = TYPE_U32;
-       dst = ir3_reg_create(mov, 0, IR3_REG_RELATIV);
-       dst->size  = arrsz;
-       dst->offset = n;
+       dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
+                       COND(address, IR3_REG_RELATIV));
+       dst->instr = arr->last_access;
+       dst->size  = arr->length;
+       dst->array.id = arr->id;
+       dst->array.offset = n;
        ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
-       mov->fanin = collect;
 
        ir3_instr_set_address(mov, address);
 
+       arr->last_write = arr->last_access = mov;
+
        return mov;
 }
 
@@ -1198,7 +1048,7 @@ emit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
 {
        nir_deref_var *dvar = intr->variables[0];
        nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
-       struct ir3_array_value *arr = get_var(ctx, dvar->var);
+       struct ir3_array *arr = get_var(ctx, dvar->var);
 
        compile_assert(ctx, dvar->deref.child &&
                (dvar->deref.child->deref_type == nir_deref_type_array));
@@ -1209,19 +1059,17 @@ emit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
                for (int i = 0; i < intr->num_components; i++) {
                        unsigned n = darr->base_offset * 4 + i;
                        compile_assert(ctx, n < arr->length);
-                       dst[i] = arr->arr[n];
+                       dst[i] = create_var_load(ctx, arr, n, NULL);
                }
                break;
        case nir_deref_array_type_indirect: {
                /* for indirect, we need to collect all the array elements: */
-               struct ir3_instruction *collect =
-                               create_collect(ctx->block, arr->arr, arr->length);
                struct ir3_instruction *addr =
                                get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
                for (int i = 0; i < intr->num_components; i++) {
                        unsigned n = darr->base_offset * 4 + i;
                        compile_assert(ctx, n < arr->length);
-                       dst[i] = create_indirect_load(ctx, arr->length, n, addr, collect);
+                       dst[i] = create_var_load(ctx, arr, n, addr);
                }
                break;
        }
@@ -1238,8 +1086,9 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 {
        nir_deref_var *dvar = intr->variables[0];
        nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
-       struct ir3_array_value *arr = get_var(ctx, dvar->var);
-       struct ir3_instruction **src;
+       struct ir3_array *arr = get_var(ctx, dvar->var);
+       struct ir3_instruction *addr, **src;
+       unsigned wrmask = intr->const_index[0];
 
        compile_assert(ctx, dvar->deref.child &&
                (dvar->deref.child->deref_type == nir_deref_type_array));
@@ -1248,66 +1097,24 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 
        switch (darr->deref_array_type) {
        case nir_deref_array_type_direct:
-               /* direct access does not require anything special: */
-               for (int i = 0; i < intr->num_components; i++) {
-                       /* ttn doesn't generate partial writemasks */
-                       assert(intr->const_index[0] ==
-                              (1 << intr->num_components) - 1);
-
-                       unsigned n = darr->base_offset * 4 + i;
-                       compile_assert(ctx, n < arr->length);
-                       arr->arr[n] = src[i];
-               }
+               addr = NULL;
                break;
-       case nir_deref_array_type_indirect: {
-               /* for indirect, create indirect-store and fan that out: */
-               struct ir3_instruction *collect =
-                               create_collect(ctx->block, arr->arr, arr->length);
-               struct ir3_instruction *addr =
-                               get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
-               for (int i = 0; i < intr->num_components; i++) {
-                       /* ttn doesn't generate partial writemasks */
-                       assert(intr->const_index[0] ==
-                              (1 << intr->num_components) - 1);
-
-                       struct ir3_instruction *store;
-                       unsigned n = darr->base_offset * 4 + i;
-                       compile_assert(ctx, n < arr->length);
-
-                       store = create_indirect_store(ctx, arr->length,
-                                       n, src[i], addr, collect);
-
-                       store->fanin->fi.aid = arr->aid;
-
-                       /* TODO: probably split this out to be used for
-                        * store_output_indirect? or move this into
-                        * create_indirect_store()?
-                        */
-                       for (int j = i; j < arr->length; j += intr->num_components) {
-                               struct ir3_instruction *split;
-
-                               split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
-                               split->fo.off = j;
-                               ir3_reg_create(split, 0, 0);
-                               ir3_reg_create(split, 0, IR3_REG_SSA)->instr = store;
-
-                               arr->arr[j] = split;
-                       }
-               }
-               /* fixup fanout/split neighbors: */
-               for (int i = 0; i < arr->length; i++) {
-                       arr->arr[i]->cp.right = (i < (arr->length - 1)) ?
-                                       arr->arr[i+1] : NULL;
-                       arr->arr[i]->cp.left = (i > 0) ?
-                                       arr->arr[i-1] : NULL;
-               }
+       case nir_deref_array_type_indirect:
+               addr = get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
                break;
-       }
        default:
                compile_error(ctx, "Unhandled store deref type: %u\n",
                                darr->deref_array_type);
                break;
        }
+
+       for (int i = 0; i < intr->num_components; i++) {
+               if (!(wrmask & (1 << i)))
+                       continue;
+               unsigned n = darr->base_offset * 4 + i;
+               compile_assert(ctx, n < arr->length);
+               create_var_store(ctx, arr, n, src[i], addr);
+       }
 }
 
 static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
@@ -1835,8 +1642,6 @@ resolve_phis(struct ir3_compile *ctx, struct ir3_block *block)
                        ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
                }
        }
-
-       resolve_array_phis(ctx, block);
 }
 
 static void
index a6e69d2..0d88e7b 100644 (file)
@@ -202,6 +202,7 @@ static void combine_flags(unsigned *dstflags, unsigned srcflags)
        *dstflags |= srcflags & IR3_REG_CONST;
        *dstflags |= srcflags & IR3_REG_IMMED;
        *dstflags |= srcflags & IR3_REG_RELATIV;
+       *dstflags |= srcflags & IR3_REG_ARRAY;
 }
 
 /* the "plain" MAD's (ie. the ones that don't shift first src prior to
@@ -233,6 +234,10 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
                combine_flags(&new_flags, src_reg->flags);
 
                if (valid_flags(instr, n, new_flags)) {
+                       if (new_flags & IR3_REG_ARRAY) {
+                               debug_assert(!(reg->flags & IR3_REG_ARRAY));
+                               reg->array = src_reg->array;
+                       }
                        reg->flags = new_flags;
                        reg->instr = ssa(src_reg);
                }
@@ -283,6 +288,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
                                        conflicts(instr->address, reg->instr->address))
                                return;
 
+                       src_reg = ir3_reg_clone(instr->block->shader, src_reg);
                        src_reg->flags = new_flags;
                        instr->regs[n+1] = src_reg;
 
@@ -294,6 +300,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
 
                if ((src_reg->flags & IR3_REG_RELATIV) &&
                                !conflicts(instr->address, reg->instr->address)) {
+                       src_reg = ir3_reg_clone(instr->block->shader, src_reg);
                        src_reg->flags = new_flags;
                        instr->regs[n+1] = src_reg;
                        ir3_instr_set_address(instr, reg->instr->address);
@@ -329,6 +336,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
                        /* other than category 1 (mov) we can only encode up to 10 bits: */
                        if ((instr->category == 1) || !(iim_val & ~0x3ff)) {
                                new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
+                               src_reg = ir3_reg_clone(instr->block->shader, src_reg);
                                src_reg->flags = new_flags;
                                src_reg->iim_val = iim_val;
                                instr->regs[n+1] = src_reg;
@@ -349,9 +357,11 @@ eliminate_output_mov(struct ir3_instruction *instr)
 {
        if (is_eligible_mov(instr, false)) {
                struct ir3_register *reg = instr->regs[1];
-               struct ir3_instruction *src_instr = ssa(reg);
-               debug_assert(src_instr);
-               return src_instr;
+               if (!(reg->flags & IR3_REG_ARRAY)) {
+                       struct ir3_instruction *src_instr = ssa(reg);
+                       debug_assert(src_instr);
+                       return src_instr;
+               }
        }
        return instr;
 }
@@ -379,9 +389,22 @@ instr_cp(struct ir3_instruction *instr)
                        continue;
 
                instr_cp(src);
+
+               /* TODO non-indirect access we could figure out which register
+                * we actually want and allow cp..
+                */
+               if (reg->flags & IR3_REG_ARRAY)
+                       continue;
+
                reg_cp(instr, reg, n);
        }
 
+       if (instr->regs[0]->flags & IR3_REG_ARRAY) {
+               struct ir3_instruction *src = ssa(instr->regs[0]);
+               if (src)
+                       instr_cp(src);
+       }
+
        if (instr->address) {
                instr_cp(instr->address);
                ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
index 4bbc045..3354cbd 100644 (file)
@@ -118,6 +118,10 @@ ir3_instr_depth(struct ir3_instruction *instr)
                /* visit child to compute it's depth: */
                ir3_instr_depth(src);
 
+               /* for array writes, no need to delay on previous write: */
+               if (i == 0)
+                       continue;
+
                sd = ir3_delayslots(src, instr, i) + src->depth;
 
                instr->depth = MAX2(instr->depth, sd);
index a84e798..ec832f5 100644 (file)
@@ -94,7 +94,7 @@ static void print_instr_name(struct ir3_instruction *instr)
        }
 }
 
-static void print_reg_name(struct ir3_register *reg, bool followssa)
+static void print_reg_name(struct ir3_register *reg)
 {
        if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
                        (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
@@ -106,20 +106,29 @@ static void print_reg_name(struct ir3_register *reg, bool followssa)
 
        if (reg->flags & IR3_REG_IMMED) {
                printf("imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
-       } else if (reg->flags & IR3_REG_SSA) {
-               printf("_");
-               if (followssa) {
-                       printf("[");
+       } else if (reg->flags & IR3_REG_ARRAY) {
+               printf("arr[id=%u, offset=%u, size=%u", reg->array.id,
+                               reg->array.offset, reg->size);
+               /* for ARRAY we could have null src, for example first write
+                * instruction..
+                */
+               if (reg->instr) {
+                       printf(", _[");
                        print_instr_name(reg->instr);
                        printf("]");
                }
+               printf("]");
+       } else if (reg->flags & IR3_REG_SSA) {
+               printf("_[");
+               print_instr_name(reg->instr);
+               printf("]");
        } else if (reg->flags & IR3_REG_RELATIV) {
                if (reg->flags & IR3_REG_HALF)
                        printf("h");
                if (reg->flags & IR3_REG_CONST)
-                       printf("c<a0.x + %u>", reg->num);
+                       printf("c<a0.x + %u>", reg->array.offset);
                else
-                       printf("\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->num, reg->size);
+                       printf("\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->array.offset, reg->size);
        } else {
                if (reg->flags & IR3_REG_HALF)
                        printf("h");
@@ -158,7 +167,7 @@ print_instr(struct ir3_instruction *instr, int lvl)
        for (i = 0; i < instr->regs_count; i++) {
                struct ir3_register *reg = instr->regs[i];
                printf(i ? ", " : " ");
-               print_reg_name(reg, !!i);
+               print_reg_name(reg);
        }
 
        if (instr->address) {
@@ -168,13 +177,6 @@ print_instr(struct ir3_instruction *instr, int lvl)
                printf("]");
        }
 
-       if (instr->fanin) {
-               printf(", fanin=_");
-               printf("[");
-               print_instr_name(instr->fanin);
-               printf("]");
-       }
-
        if (instr->cp.left) {
                printf(", left=_");
                printf("[");
@@ -192,8 +194,6 @@ print_instr(struct ir3_instruction *instr, int lvl)
        if (is_meta(instr)) {
                if (instr->opc == OPC_META_FO) {
                        printf(", off=%d", instr->fo.off);
-               } else if ((instr->opc == OPC_META_FI) && instr->fi.aid) {
-                       printf(", aid=%d", instr->fi.aid);
                }
        }
 
index 88ca95a..3c42f8e 100644 (file)
  * LOAD_PAYLOAD instruction which gets turned into multiple MOV's after
  * register assignment.  But for us that is horrible from a scheduling
  * standpoint.  Instead what we do is use idea of 'definer' instruction.
- * Ie. the first instruction (lowest ip) to write to the array is the
+ * Ie. the first instruction (lowest ip) to write to the variable is the
  * one we consider from use/def perspective when building interference
- * graph.  (Other instructions which write other array elements just
- * define the variable some more.)
+ * graph.  (Other instructions which write other variable components
+ * just define the variable some more.)
+ *
+ * Arrays of arbitrary size are handled via pre-coloring a consecutive
+ * sequence of registers.  Additional scalar (single component) reg
+ * names are allocated starting at ctx->class_base[total_class_count]
+ * (see arr->base), which are pre-colored.  In the use/def graph direct
+ * access is treated as a single element use/def, and indirect access
+ * is treated as use or def of all array elements.  (Only the first
+ * def is tracked, in case of multiple indirect writes, etc.)
  */
 
 static const unsigned class_sizes[] = {
        1, 2, 3, 4,
        4 + 4, /* txd + 1d/2d */
        4 + 6, /* txd + 3d */
-       /* temporary: until we can assign arrays, create classes so we
-        * can round up array to fit.  NOTE with tgsi arrays should
-        * really all be multiples of four:
-        */
-       4 * 4,
-       4 * 8,
-       4 * 16,
-       4 * 32,
-
 };
 #define class_count ARRAY_SIZE(class_sizes)
 
@@ -265,8 +264,9 @@ struct ir3_ra_ctx {
        struct ir3_ra_reg_set *set;
        struct ra_graph *g;
        unsigned alloc_count;
-       unsigned class_alloc_count[total_class_count];
-       unsigned class_base[total_class_count];
+       /* one per class, plus one slot for arrays: */
+       unsigned class_alloc_count[total_class_count + 1];
+       unsigned class_base[total_class_count + 1];
        unsigned instr_cnt;
        unsigned *def, *use;     /* def/use table */
        struct ir3_ra_instr_data *instrd;
@@ -329,9 +329,6 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
        struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
        struct ir3_instruction *d = NULL;
 
-       if (instr->fanin)
-               return get_definer(ctx, instr->fanin, sz, off);
-
        if (id->defn) {
                *sz = id->sz;
                *off = id->off;
@@ -485,10 +482,13 @@ ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                /* couple special cases: */
                if (writes_addr(instr) || writes_pred(instr)) {
                        id->cls = -1;
-                       continue;
+               } else if (instr->regs[0]->flags & IR3_REG_ARRAY) {
+                       id->cls = total_class_count;
+                       id->defn = instr;
+               } else {
+                       id->defn = get_definer(ctx, instr, &id->sz, &id->off);
+                       id->cls = size_to_class(id->sz, is_half(id->defn));
                }
-               id->defn = get_definer(ctx, instr, &id->sz, &id->off);
-               id->cls = size_to_class(id->sz, is_half(id->defn));
        }
 }
 
@@ -518,8 +518,6 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 
                /* arrays which don't fit in one of the pre-defined class
                 * sizes are pre-colored:
-                *
-                * TODO but we still need to allocate names for them, don't we??
                 */
                if (id->cls >= 0) {
                        instr->name = ctx->class_alloc_count[id->cls]++;
@@ -531,7 +529,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 static void
 ra_init(struct ir3_ra_ctx *ctx)
 {
-       unsigned n;
+       unsigned n, base;
 
        ir3_clear_mark(ctx->ir);
        n = ir3_count_instructions(ctx->ir);
@@ -550,11 +548,20 @@ ra_init(struct ir3_ra_ctx *ctx)
         * actual ra name is class_base[cls] + instr->name;
         */
        ctx->class_base[0] = 0;
-       for (unsigned i = 1; i < total_class_count; i++) {
+       for (unsigned i = 1; i <= total_class_count; i++) {
                ctx->class_base[i] = ctx->class_base[i-1] +
                                ctx->class_alloc_count[i-1];
        }
 
+       /* and vreg names for array elements: */
+       base = ctx->class_base[total_class_count];
+       list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+               arr->base = base;
+               ctx->class_alloc_count[total_class_count] += arr->length;
+               base += arr->length;
+       }
+       ctx->alloc_count += ctx->class_alloc_count[total_class_count];
+
        ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
        ralloc_steal(ctx->g, ctx->instrd);
        ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
@@ -566,6 +573,7 @@ __ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn)
 {
        unsigned name;
        debug_assert(cls >= 0);
+       debug_assert(cls < total_class_count);  /* we shouldn't get arrays here.. */
        name = ctx->class_base[cls] + defn->name;
        debug_assert(name < ctx->alloc_count);
        return name;
@@ -590,6 +598,22 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
        struct ir3_ra_block_data *bd;
        unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
 
+       void def(unsigned name, struct ir3_instruction *instr)
+       {
+               /* defined on first write: */
+               if (!ctx->def[name])
+                       ctx->def[name] = instr->ip;
+               ctx->use[name] = instr->ip;
+               BITSET_SET(bd->def, name);
+       }
+
+       void use(unsigned name, struct ir3_instruction *instr)
+       {
+               ctx->use[name] = MAX2(ctx->use[name], instr->ip);
+               if (!BITSET_TEST(bd->def, name))
+                       BITSET_SET(bd->use, name);
+       }
+
        bd = rzalloc(ctx->g, struct ir3_ra_block_data);
 
        bd->def     = rzalloc_array(bd, BITSET_WORD, bitset_words);
@@ -601,6 +625,7 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 
        list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
                struct ir3_instruction *src;
+               struct ir3_register *reg;
 
                if (instr->regs_count == 0)
                        continue;
@@ -632,17 +657,45 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 
                if (writes_gpr(instr)) {
                        struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+                       struct ir3_register *dst = instr->regs[0];
 
-                       if (id->defn == instr) {
-                               unsigned name = ra_name(ctx, id);
+                       if (dst->flags & IR3_REG_ARRAY) {
+                               struct ir3_array *arr =
+                                       ir3_lookup_array(ctx->ir, dst->array.id);
+                               unsigned i;
+
+                               debug_assert(!(dst->flags & IR3_REG_PHI_SRC));
+
+                               /* set the node class now.. in case we don't encounter
+                                * this array dst again.  From register_alloc algo's
+                                * perspective, these are all single/scalar regs:
+                                */
+                               for (i = 0; i < arr->length; i++) {
+                                       unsigned name = arr->base + i;
+                                       ra_set_node_class(ctx->g, name, ctx->set->classes[0]);
+                               }
+
+                               /* indirect write is treated like a write to all array
+                                * elements, since we don't know which one is actually
+                                * written:
+                                */
+                               if (dst->flags & IR3_REG_RELATIV) {
+                                       for (i = 0; i < arr->length; i++) {
+                                               unsigned name = arr->base + i;
+                                               def(name, instr);
+                                       }
+                               } else {
+                                       unsigned name = arr->base + dst->array.offset;
+                                       def(name, instr);
+                               }
 
-                               ctx->def[name] = id->defn->ip;
-                               ctx->use[name] = id->defn->ip;
+                       } else if (id->defn == instr) {
+                               unsigned name = ra_name(ctx, id);
 
                                /* since we are in SSA at this point: */
                                debug_assert(!BITSET_TEST(bd->use, name));
 
-                               BITSET_SET(bd->def, name);
+                               def(name, id->defn);
 
                                if (is_half(id->defn)) {
                                        ra_set_node_class(ctx->g, name,
@@ -672,12 +725,28 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                        }
                }
 
-               foreach_ssa_src(src, instr) {
-                       if (writes_gpr(src)) {
+               foreach_src(reg, instr) {
+                       if (reg->flags & IR3_REG_ARRAY) {
+                               struct ir3_array *arr =
+                                       ir3_lookup_array(ctx->ir, reg->array.id);
+                               /* indirect read is treated like a read fromall array
+                                * elements, since we don't know which one is actually
+                                * read:
+                                */
+                               if (reg->flags & IR3_REG_RELATIV) {
+                                       unsigned i;
+                                       for (i = 0; i < arr->length; i++) {
+                                               unsigned name = arr->base + i;
+                                               use(name, instr);
+                                       }
+                               } else {
+                                       unsigned name = arr->base + reg->array.offset;
+                                       use(name, instr);
+                                       debug_assert(reg->array.offset < arr->length);
+                               }
+                       } else if ((src = ssa(reg)) && writes_gpr(src)) {
                                unsigned name = ra_name(ctx, &ctx->instrd[src->ip]);
-                               ctx->use[name] = MAX2(ctx->use[name], instr->ip);
-                               if (!BITSET_TEST(bd->def, name))
-                                       BITSET_SET(bd->use, name);
+                               use(name, instr);
                        }
                }
        }
@@ -830,18 +899,36 @@ static void fixup_half_instr_src(struct ir3_instruction *instr)
        }
 }
 
+/* NOTE: instr could be NULL for IR3_REG_ARRAY case, for the first
+ * array access(es) which do not have any previous access to depend
+ * on from scheduling point of view
+ */
 static void
 reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
                struct ir3_instruction *instr)
 {
-       struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
-       if (id->defn) {
+       struct ir3_ra_instr_data *id;
+
+       if (reg->flags & IR3_REG_ARRAY) {
+               struct ir3_array *arr =
+                       ir3_lookup_array(ctx->ir, reg->array.id);
+               unsigned name = arr->base + reg->array.offset;
+               unsigned r = ra_get_node_reg(ctx->g, name);
+               unsigned num = ctx->set->ra_reg_to_gpr[r];
+
+               if (reg->flags & IR3_REG_RELATIV) {
+                       reg->array.offset = num;
+               } else {
+                       reg->num = num;
+               }
+
+               reg->flags &= ~IR3_REG_ARRAY;
+       } else if ((id = &ctx->instrd[instr->ip]) && id->defn) {
                unsigned name = ra_name(ctx, id);
                unsigned r = ra_get_node_reg(ctx->g, name);
                unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
 
-               if (reg->flags & IR3_REG_RELATIV)
-                       num += reg->offset;
+               debug_assert(!(reg->flags & IR3_REG_RELATIV));
 
                reg->num = num;
                reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
@@ -868,9 +955,9 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 
                foreach_src_n(reg, n, instr) {
                        struct ir3_instruction *src = reg->instr;
-                       if (!src)
+                       /* Note: reg->instr could be null for IR3_REG_ARRAY */
+                       if (!(src || (reg->flags & IR3_REG_ARRAY)))
                                continue;
-
                        reg_assign(ctx, instr->regs[n+1], src);
                        if (instr->regs[n+1]->flags & IR3_REG_HALF)
                                fixup_half_instr_src(instr);
@@ -881,6 +968,8 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 static int
 ra_alloc(struct ir3_ra_ctx *ctx)
 {
+       unsigned n = 0;
+
        /* frag shader inputs get pre-assigned, since we have some
         * constraints/unknowns about setup for some of these regs:
         */
@@ -898,7 +987,8 @@ ra_alloc(struct ir3_ra_ctx *ctx)
                        i += 4;
                }
 
-               for (j = 0; i < ir->ninputs; i++) {
+               j = 0;
+               for (; i < ir->ninputs; i++) {
                        struct ir3_instruction *instr = ir->inputs[i];
                        if (instr) {
                                struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
@@ -914,6 +1004,24 @@ ra_alloc(struct ir3_ra_ctx *ctx)
                                }
                        }
                }
+               n = j;
+       }
+
+       /* pre-assign array elements:
+        * TODO we could be a bit more clever if we knew which arrays didn't
+        * fully (partially?) conflict with each other..
+        */
+       list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+               unsigned i;
+               for (i = 0; i < arr->length; i++) {
+                       unsigned name, reg;
+
+                       name = arr->base + i;
+                       reg = ctx->set->gpr_to_ra_reg[0][n++];
+
+                       ra_set_node_reg(ctx->g, name, reg);
+
+               }
        }
 
        if (!ra_allocate(ctx->g))
index 6aaa16e..8f640fe 100644 (file)
@@ -187,6 +187,9 @@ delay_calc(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
 
        foreach_ssa_src_n(src, i, instr) {
                unsigned d;
+               /* for array writes, no need to delay on previous write: */
+               if (i == 0)
+                       continue;
                if (src->block != instr->block)
                        continue;
                d = delay_calc_srcn(ctx, src, instr, i);