const unsigned primitive_map = const_state->offsets.primitive_map * 4;
switch (intr->intrinsic) {
+ case nir_intrinsic_decl_reg:
+ /* There's logically nothing to do, but this has a destination in NIR so
+ * plug in something... It will get DCE'd.
+ */
+ dst[0] = create_immed(ctx->block, 0);
+ break;
+
+ case nir_intrinsic_load_reg:
+ case nir_intrinsic_load_reg_indirect: {
+ struct ir3_array *arr = ir3_get_array(ctx, intr->src[0].ssa);
+ struct ir3_instruction *addr = NULL;
+
+ if (intr->intrinsic == nir_intrinsic_load_reg_indirect) {
+ addr = ir3_get_addr0(ctx, ir3_get_src(ctx, &intr->src[1])[0],
+ dest_components);
+ }
+
+ ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(intr->src[0].ssa);
+ assert(dest_components == nir_intrinsic_num_components(decl));
+
+ for (unsigned i = 0; i < dest_components; i++) {
+ unsigned n = nir_intrinsic_base(intr) * dest_components + i;
+ compile_assert(ctx, n < arr->length);
+ dst[i] = ir3_create_array_load(ctx, arr, n, addr);
+ }
+
+ break;
+ }
+
+ case nir_intrinsic_store_reg:
+ case nir_intrinsic_store_reg_indirect: {
+ struct ir3_array *arr = ir3_get_array(ctx, intr->src[1].ssa);
+ unsigned num_components = nir_src_num_components(intr->src[0]);
+ struct ir3_instruction *addr = NULL;
+
+ ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(intr->src[1].ssa);
+ assert(num_components == nir_intrinsic_num_components(decl));
+
+ struct ir3_instruction *const *value = ir3_get_src(ctx, &intr->src[0]);
+
+ if (intr->intrinsic == nir_intrinsic_store_reg_indirect) {
+ addr = ir3_get_addr0(ctx, ir3_get_src(ctx, &intr->src[2])[0],
+ num_components);
+ }
+
+ u_foreach_bit(i, nir_intrinsic_write_mask(intr)) {
+ assert(i < num_components);
+
+ unsigned n = nir_intrinsic_base(intr) * num_components + i;
+ compile_assert(ctx, n < arr->length);
+ if (value[i])
+ ir3_create_array_store(ctx, arr, n, value[i], addr);
+ }
+
+ break;
+ }
+
case nir_intrinsic_load_uniform:
idx = nir_intrinsic_base(intr);
if (nir_src_is_const(intr->src[0])) {
ctx->so->shared_size = ctx->s->info.shared_size;
/* NOTE: need to do something more clever when we support >1 fxn */
- nir_foreach_register (reg, &fxn->registers) {
- ir3_declare_array(ctx, reg);
+ nir_foreach_reg_decl (decl, fxn) {
+ ir3_declare_array(ctx, decl);
}
if (ctx->so->type == MESA_SHADER_TESS_CTRL &&
#include "ir3_image.h"
#include "ir3_nir.h"
#include "ir3_shader.h"
+#include "nir.h"
+#include "nir_intrinsics_indices.h"
struct ir3_context *
ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
*/
bool progress = false;
bool needs_late_alg = false;
- NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1);
+ NIR_PASS(progress, ctx->s, nir_lower_locals_to_reg_intrinsics, 1);
- /* we could need cleanup after lower_locals_to_regs */
+ /* we could need cleanup after lower_locals_to_reg_intrinsics */
while (progress) {
progress = false;
NIR_PASS(progress, ctx->s, nir_opt_algebraic);
}
/* We want to lower nir_op_imul as late as possible, to catch also
- * those generated by earlier passes (e.g, nir_lower_locals_to_regs).
- * However, we want a final swing of a few passes to have a chance
- * at optimizing the result.
+ * those generated by earlier passes (e.g,
+ * nir_lower_locals_to_reg_intrinsics). However, we want a final swing of a
+ * few passes to have a chance at optimizing the result.
*/
progress = false;
NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
struct ir3_instruction **
ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n)
{
- struct ir3_instruction **value;
+ assert(dst->is_ssa);
+ struct ir3_instruction **value = ir3_get_dst_ssa(ctx, &dst->ssa, n);
- if (dst->is_ssa) {
- value = ir3_get_dst_ssa(ctx, &dst->ssa, n);
- } else {
- value = ralloc_array(ctx, struct ir3_instruction *, n);
- }
-
- /* NOTE: in non-ssa case, we don't really need to store last_dst
- * but this helps us catch cases where put_dst() call is forgotten
- */
compile_assert(ctx, !ctx->last_dst);
ctx->last_dst = value;
ctx->last_dst_n = n;
struct ir3_instruction *const *
ir3_get_src(struct ir3_context *ctx, nir_src *src)
{
- if (src->is_ssa) {
- struct hash_entry *entry;
- entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
- compile_assert(ctx, entry);
- return entry->data;
- } else {
- nir_register *reg = src->reg.reg;
- struct ir3_array *arr = ir3_get_array(ctx, reg);
- unsigned num_components = arr->r->num_components;
- struct ir3_instruction *addr = NULL;
- struct ir3_instruction **value =
- ralloc_array(ctx, struct ir3_instruction *, num_components);
-
- if (src->reg.indirect)
- addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
- reg->num_components);
-
- for (unsigned i = 0; i < num_components; i++) {
- unsigned n = src->reg.base_offset * reg->num_components + i;
- compile_assert(ctx, n < arr->length);
- value[i] = ir3_create_array_load(ctx, arr, n, addr);
- }
-
- return value;
- }
+ assert(src->is_ssa);
+ struct hash_entry *entry;
+ entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
+ compile_assert(ctx, entry);
+ return entry->data;
}
void
}
}
- if (!dst->is_ssa) {
- nir_register *reg = dst->reg.reg;
- struct ir3_array *arr = ir3_get_array(ctx, reg);
- unsigned num_components = ctx->last_dst_n;
- struct ir3_instruction *addr = NULL;
-
- if (dst->reg.indirect)
- addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
- reg->num_components);
-
- for (unsigned i = 0; i < num_components; i++) {
- unsigned n = dst->reg.base_offset * reg->num_components + i;
- compile_assert(ctx, n < arr->length);
- if (!ctx->last_dst[i])
- continue;
- ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr);
- }
-
- ralloc_free(ctx->last_dst);
- }
-
+ assert(dst->is_ssa);
ctx->last_dst = NULL;
ctx->last_dst_n = 0;
}
*/
void
-ir3_declare_array(struct ir3_context *ctx, nir_register *reg)
+ir3_declare_array(struct ir3_context *ctx, nir_intrinsic_instr *decl)
{
struct ir3_array *arr = rzalloc(ctx, struct ir3_array);
arr->id = ++ctx->num_arrays;
* It would be nice if there was a nir pass to convert arrays of
* length 1 to ssa.
*/
- arr->length = reg->num_components * MAX2(1, reg->num_array_elems);
+ arr->length = nir_intrinsic_num_components(decl) *
+ MAX2(1, nir_intrinsic_num_array_elems(decl));
+
compile_assert(ctx, arr->length > 0);
- arr->r = reg;
- arr->half = ir3_bitsize(ctx, reg->bit_size) <= 16;
+ arr->r = &decl->dest.ssa;
+ arr->half = ir3_bitsize(ctx, nir_intrinsic_bit_size(decl)) <= 16;
list_addtail(&arr->node, &ctx->ir->array_list);
}
struct ir3_array *
-ir3_get_array(struct ir3_context *ctx, nir_register *reg)
+ir3_get_array(struct ir3_context *ctx, nir_ssa_def *reg)
{
foreach_array (arr, &ctx->ir->array_list) {
if (arr->r == reg)
struct ir3_register *dst;
unsigned flags = 0;
- /* if not relative store, don't create an extra mov, since that
- * ends up being difficult for cp to remove.
- *
- * Also, don't skip the mov if the src is meta (like fanout/split),
- * since that creates a situation that RA can't really handle properly.
- */
- if (!address && !is_meta(src)) {
- dst = src->dsts[0];
-
- src->barrier_class |= IR3_BARRIER_ARRAY_W;
- src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
-
- dst->flags |= IR3_REG_ARRAY;
- dst->size = arr->length;
- dst->array.id = arr->id;
- dst->array.offset = n;
- dst->array.base = INVALID_REG;
-
- if (arr->last_write && arr->last_write->instr->block == src->block)
- ir3_reg_set_last_array(src, dst, arr->last_write);
-
- arr->last_write = dst;
-
- array_insert(block, block->keeps, src);
-
- return;
- }
-
mov = ir3_instr_create(block, OPC_MOV, 1, 1);
if (arr->half) {
mov->cat1.src_type = TYPE_U16;