nir: Get rid of the array elements parameter on load/store intrinsics
authorJason Ekstrand <jason.ekstrand@intel.com>
Tue, 19 May 2015 23:57:43 +0000 (16:57 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Wed, 20 May 2015 16:28:06 +0000 (09:28 -0700)
Previously, we used intrinsic->const_index[1] to represent "the number of
array elements to load" for load/store intrinsics.  However, this set to 1
by every pass that ever creates a load/store intrinsic.  Also, while it
might make some sense for registers, it makes no sense whatsoever in SSA.
On top of that, the i965 backend was the only backend to ever support it;
freedreno and vc4 just assert that it's always 1.  Let's just delete it.

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Rob Clark <robclark@freedesktop.org>
src/gallium/auxiliary/nir/tgsi_to_nir.c
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
src/gallium/drivers/vc4/vc4_program.c
src/glsl/nir/nir_intrinsics.h
src/glsl/nir/nir_lower_io.c
src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index 50ce3dc..1702b41 100644 (file)
@@ -401,7 +401,6 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
 
       load->num_components = 4;
       load->const_index[0] = index;
-      load->const_index[1] = 1;
       if (dim) {
          if (dimind) {
             load->src[srcn] =
@@ -1671,7 +1670,6 @@ ttn_add_output_stores(struct ttn_compile *c)
             nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
          store->num_components = 4;
          store->const_index[0] = var->data.driver_location + i;
-         store->const_index[1] = 1;
          store->src[0].reg.reg = c->output_regs[var->data.driver_location].reg;
          nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
       }
index 05e7049..2cf25ea 100644 (file)
@@ -1158,14 +1158,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
 
        switch (intr->intrinsic) {
        case nir_intrinsic_load_uniform:
-               compile_assert(ctx, intr->const_index[1] == 1);
                for (int i = 0; i < intr->num_components; i++) {
                        unsigned n = idx * 4 + i;
                        dst[i] = create_uniform(ctx, n);
                }
                break;
        case nir_intrinsic_load_uniform_indirect:
-               compile_assert(ctx, intr->const_index[1] == 1);
                src = get_src(ctx, &intr->src[0]);
                for (int i = 0; i < intr->num_components; i++) {
                        unsigned n = idx * 4 + i;
@@ -1178,14 +1176,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
                emit_intrinsic_load_ubo(ctx, intr, dst);
                break;
        case nir_intrinsic_load_input:
-               compile_assert(ctx, intr->const_index[1] == 1);
                for (int i = 0; i < intr->num_components; i++) {
                        unsigned n = idx * 4 + i;
                        dst[i] = b->inputs[n];
                }
                break;
        case nir_intrinsic_load_input_indirect:
-               compile_assert(ctx, intr->const_index[1] == 1);
                src = get_src(ctx, &intr->src[0]);
                struct ir3_instruction *collect =
                                create_collect(b, b->inputs, b->ninputs);
@@ -1202,7 +1198,6 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
                emit_intrinisic_store_var(ctx, intr);
                break;
        case nir_intrinsic_store_output:
-               compile_assert(ctx, intr->const_index[1] == 1);
                src = get_src(ctx, &intr->src[0]);
                for (int i = 0; i < intr->num_components; i++) {
                        unsigned n = idx * 4 + i;
index bf156f9..d84e5f2 100644 (file)
@@ -1849,8 +1849,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
 
         switch (instr->intrinsic) {
         case nir_intrinsic_load_uniform:
-                assert(instr->const_index[1] == 1);
-
                 for (int i = 0; i < instr->num_components; i++) {
                         dest[i] = qir_uniform(c, QUNIFORM_UNIFORM,
                                               instr->const_index[0] * 4 + i);
@@ -1858,8 +1856,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
                 break;
 
         case nir_intrinsic_load_uniform_indirect:
-                assert(instr->const_index[1] == 1);
-
                 for (int i = 0; i < instr->num_components; i++) {
                         dest[i] = indirect_uniform_load(c,
                                                         ntq_get_src(c, instr->src[0], 0),
@@ -1870,8 +1866,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
                 break;
 
         case nir_intrinsic_load_input:
-                assert(instr->const_index[1] == 1);
-
                 for (int i = 0; i < instr->num_components; i++)
                         dest[i] = c->inputs[instr->const_index[0] * 4 + i];
 
index 10192c5..b516830 100644 (file)
@@ -138,12 +138,11 @@ SYSTEM_VALUE(sample_mask_in, 1)
 SYSTEM_VALUE(invocation_id, 1)
 
 /*
- * The first index is the address to load from, and the second index is the
- * number of array elements to load.  Indirect loads have an additional
- * register input, which is added to the constant address to compute the
- * final address to load from.  For UBO's (and SSBO's), the first source is
- * the (possibly constant) UBO buffer index and the indirect (if it exists)
- * is the second source.
+ * The first and only index is the base address to load from.  Indirect
+ * loads have an additional register input, which is added to the constant
+ * address to compute the final address to load from.  For UBO's (and
+ * SSBO's), the first source is the (possibly constant) UBO buffer index
+ * and the indirect (if it exists) is the second source.
  *
  * For vector backends, the address is in terms of one vec4, and so each array
  * element is +4 scalar components from the previous array element. For scalar
@@ -152,9 +151,9 @@ SYSTEM_VALUE(invocation_id, 1)
  */
 
 #define LOAD(name, extra_srcs, flags) \
-   INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \
+   INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1, flags) \
    INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
-             true, 0, 0, 2, flags)
+             true, 0, 0, 1, flags)
 
 LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
@@ -172,7 +171,7 @@ LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
    INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
              num_indices, flags) \
 
-STORE(output, 2, 0)
-/* STORE(ssbo, 3, 0) */
+STORE(output, 1, 0)
+/* STORE(ssbo, 2, 0) */
 
 LAST_INTRINSIC(store_output_indirect)
index 03eed04..6761d5b 100644 (file)
@@ -288,7 +288,6 @@ nir_lower_io_block(nir_block *block, void *void_state)
          offset += intrin->variables[0]->var->data.driver_location;
 
          load->const_index[0] = offset;
-         load->const_index[1] = 1;
 
          if (has_indirect)
             load->src[0] = indirect;
@@ -331,7 +330,6 @@ nir_lower_io_block(nir_block *block, void *void_state)
          offset += intrin->variables[0]->var->data.driver_location;
 
          store->const_index[0] = offset;
-         store->const_index[1] = 1;
 
          nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
 
index 5dd8363..56a2278 100644 (file)
@@ -1345,16 +1345,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
          index -= num_direct_uniforms;
       }
 
-      for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0; j < instr->num_components; j++) {
-            fs_reg src = offset(retype(uniform_reg, dest.type), index);
-            if (has_indirect)
-               src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
-            index++;
-
-            emit(MOV(dest, src));
-            dest = offset(dest, 1);
-         }
+      for (unsigned j = 0; j < instr->num_components; j++) {
+         fs_reg src = offset(retype(uniform_reg, dest.type), index);
+         if (has_indirect)
+            src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
+         index++;
+
+         emit(MOV(dest, src));
+         dest = offset(dest, 1);
       }
       break;
    }
@@ -1426,17 +1424,15 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       /* fallthrough */
    case nir_intrinsic_load_input: {
       unsigned index = 0;
-      for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0; j < instr->num_components; j++) {
-            fs_reg src = offset(retype(nir_inputs, dest.type),
-                                instr->const_index[0] + index);
-            if (has_indirect)
-               src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
-            index++;
-
-            emit(MOV(dest, src));
-            dest = offset(dest, 1);
-         }
+      for (unsigned j = 0; j < instr->num_components; j++) {
+         fs_reg src = offset(retype(nir_inputs, dest.type),
+                             instr->const_index[0] + index);
+         if (has_indirect)
+            src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
+         index++;
+
+         emit(MOV(dest, src));
+         dest = offset(dest, 1);
       }
       break;
    }
@@ -1564,16 +1560,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    case nir_intrinsic_store_output: {
       fs_reg src = get_nir_src(instr->src[0]);
       unsigned index = 0;
-      for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0; j < instr->num_components; j++) {
-            fs_reg new_dest = offset(retype(nir_outputs, src.type),
-                                     instr->const_index[0] + index);
-            if (has_indirect)
-               src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
-            index++;
-            emit(MOV(new_dest, src));
-            src = offset(src, 1);
-         }
+      for (unsigned j = 0; j < instr->num_components; j++) {
+         fs_reg new_dest = offset(retype(nir_outputs, src.type),
+                                  instr->const_index[0] + index);
+         if (has_indirect)
+            src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
+         index++;
+         emit(MOV(new_dest, src));
+         src = offset(src, 1);
       }
       break;
    }