ir3/a7xx: Don't multiply global mem instruction's offset by 4
authorDanylo Piliaiev <dpiliaiev@igalia.com>
Wed, 19 Apr 2023 17:08:00 +0000 (19:08 +0200)
committerMarge Bot <emma+marge@anholt.net>
Tue, 5 Sep 2023 16:19:29 +0000 (16:19 +0000)
a7xx global memory instructions don't have implied shift.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23217>

src/freedreno/ir3/ir3_a6xx.c
src/freedreno/ir3/ir3_parser.y
src/freedreno/isa/ir3-cat6.xml

index 6ed32dc..1592a56 100644 (file)
@@ -347,6 +347,10 @@ emit_intrinsic_load_global_ir3(struct ir3_context *ctx,
                      0, create_immed(b, dest_components), 0);
    } else {
       offset = ir3_get_src(ctx, &intr->src[1])[0];
+      if (ctx->compiler->gen >= 7) {
+         /* A7XX TODO: Move to NIR for it to be properly optimized? */
+         offset = ir3_SHL_B(b, offset, 0, create_immed(b, 2), 0);
+      }
       load =
          ir3_LDG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0,
                    create_immed(b, 0), 0, create_immed(b, dest_components), 0);
@@ -387,6 +391,10 @@ emit_intrinsic_store_global_ir3(struct ir3_context *ctx,
                     create_immed(b, ncomp), 0);
    } else {
       offset = ir3_get_src(ctx, &intr->src[2])[0];
+      if (ctx->compiler->gen >= 7) {
+         /* A7XX TODO: Move to NIR for it to be properly optimized? */
+         offset = ir3_SHL_B(b, offset, 0, create_immed(b, 2), 0);
+      }
       stg =
          ir3_STG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0,
                    create_immed(b, 0), 0, value, 0, create_immed(b, ncomp), 0);
index 431995c..f975ead 100644 (file)
@@ -1138,7 +1138,10 @@ cat6_a6xx_global_address_pt3:
                         new_src(0, IR3_REG_IMMED)->uim_val = $3 - 2;
                         new_src(0, IR3_REG_IMMED)->uim_val = $4;
                    }
-|                  '+' cat6_reg_or_immed
+|                  '+' cat6_reg_or_immed {
+                        // Dummy src to smooth the difference between a6xx and a7xx
+                        new_src(0, IR3_REG_IMMED)->uim_val = 0;
+                   }
 
 cat6_a6xx_global_address_pt2:
                    '(' src offset ')' '<' '<' integer {
index 4f4a6ee..e9b75a7 100644 (file)
@@ -144,7 +144,7 @@ SOFTWARE.
        <encode>
                <map name="SRC2">src->srcs[1]</map>
                <map name="OFF">extract_reg_uim(src->srcs[2])</map>
-               <map name="SIZE">extract_reg_uim(src->srcs[3])</map>
+               <map name="SIZE">extract_reg_uim(src->srcs[4])</map>
                <map name="SRC1_CONST">!!(src->srcs[0]->flags &amp; IR3_REG_CONST)</map>
        </encode>
 </bitset>
@@ -256,8 +256,8 @@ SOFTWARE.
        <encode>
                <map name="SRC2">src->srcs[1]</map>
                <map name="OFF">extract_reg_uim(src->srcs[2])</map>
-               <map name="SRC3">src->srcs[3]</map>
-               <map name="SIZE">extract_reg_uim(src->srcs[4])</map>
+               <map name="SRC3">src->srcs[4]</map>
+               <map name="SIZE">extract_reg_uim(src->srcs[5])</map>
                <map name="SRC1_CONST">!!(src->srcs[0]->flags &amp; IR3_REG_CONST)</map>
        </encode>
 </bitset>