ir3: Use (ss) for instructions writing shared regs

author Connor Abbott <cwabbott0@gmail.com>

Fri, 17 Dec 2021 18:48:49 +0000 (19:48 +0100)

committer Marge Bot <emma+marge@anholt.net>

Fri, 7 Jan 2022 14:26:08 +0000 (14:26 +0000)
author Connor Abbott <cwabbott0@gmail.com>
Fri, 17 Dec 2021 18:48:49 +0000 (19:48 +0100)
committer Marge Bot <emma+marge@anholt.net>
Fri, 7 Jan 2022 14:26:08 +0000 (14:26 +0000)
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h

index e1d6399..932f85a 100644 (file)
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -1670,6 +1670,10 @@ is_local_mem_load(struct ir3_instruction *instr)
  static inline bool
  is_ss_producer(struct ir3_instruction *instr)
  {
+   foreach_dst (dst, instr) {
+      if (dst->flags & IR3_REG_SHARED)
+         return true;
+   }
     return is_sfu(instr) || is_local_mem_load(instr);
  }
  
@@ -1687,7 +1691,13 @@ soft_ss_delay(struct ir3_instruction *instr)
      * and so on. Not quite sure where it tapers out (ie. how many warps share an
      * SFU unit). But 10 seems like a reasonable # to choose:
      */
-   return 10;
+   if (is_sfu(instr) || is_local_mem_load(instr))
+      return 10;
+
+   /* The blob adds 6 nops between shared producers and consumers, and before we
+    * used (ss) this was sufficient in most cases.
+    */
+   return 6;
  }
  
  static inline bool
diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c

index 83730b5..054f4c8 100644 (file)
--- a/src/freedreno/ir3/ir3_delay.c
+++ b/src/freedreno/ir3/ir3_delay.c
@@ -76,7 +76,7 @@ ir3_delayslots(struct ir3_instruction *assigner,
  
     /* assigner must be alu: */
     if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
-       is_mem(consumer) || (assigner->dsts[0]->flags & IR3_REG_SHARED)) {
+       is_mem(consumer)) {
        return 6;
     } else {
        /* In mergedregs mode, there is an extra 2-cycle penalty when half of
diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c

index bf8906f..f46312d 100644 (file)
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@@ -255,6 +255,11 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
        if (is_sfu(n))
           regmask_set(&state->needs_ss, n->dsts[0]);
  
+      foreach_dst (dst, n) {
+         if (dst->flags & IR3_REG_SHARED)
+            regmask_set(&state->needs_ss, dst);
+      }
+
        if (is_tex_or_prefetch(n)) {
           regmask_set(&state->needs_sy, n->dsts[0]);
           if (n->opc == OPC_META_TEX_PREFETCH)
author	Connor Abbott <cwabbott0@gmail.com>
	Fri, 17 Dec 2021 18:48:49 +0000 (19:48 +0100)
committer	Marge Bot <emma+marge@anholt.net>
	Fri, 7 Jan 2022 14:26:08 +0000 (14:26 +0000)
src/freedreno/ir3/ir3.h		patch \| blob \| history
src/freedreno/ir3/ir3_delay.c		patch \| blob \| history
src/freedreno/ir3/ir3_legalize.c		patch \| blob \| history