agx: Extract umul_high implementation

author Alyssa Rosenzweig <alyssa@collabora.com>

Tue, 2 Aug 2022 17:38:57 +0000 (13:38 -0400)

committer Alyssa Rosenzweig <alyssa@rosenzweig.io>

Mon, 8 Aug 2022 00:43:54 +0000 (20:43 -0400)
author Alyssa Rosenzweig <alyssa@collabora.com>
Tue, 2 Aug 2022 17:38:57 +0000 (13:38 -0400)
committer Alyssa Rosenzweig <alyssa@rosenzweig.io>
Mon, 8 Aug 2022 00:43:54 +0000 (20:43 -0400)
diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c

index 6808d29..de4af0a 100644 (file)
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -214,6 +214,33 @@ agx_emit_load_const(agx_builder *b, nir_load_const_instr *instr)
                    nir_const_value_as_uint(instr->value[0], bit_size));
  }
  
+/*
+ * Implement umul_high of 32-bit sources by doing a 32x32->64-bit multiply and
+ * extracting only the high word.
+ */
+static agx_instr *
+agx_umul_high_to(agx_builder *b, agx_index dst, agx_index P, agx_index Q)
+{
+   assert(P.size == Q.size && "source sizes must match");
+   assert(P.size == dst.size && "dest size must match");
+   assert(P.size != AGX_SIZE_64 && "64x64 multiply should have been lowered");
+
+   static_assert(AGX_SIZE_64 == (AGX_SIZE_32 + 1), "enum wrong");
+   static_assert(AGX_SIZE_32 == (AGX_SIZE_16 + 1), "enum wrong");
+
+   agx_index product = agx_temp(b->shader, P.size + 1);
+   agx_imad_to(b, product, agx_abs(P), agx_abs(Q), agx_zero(), 0);
+   return agx_p_split_to(b, agx_null(), dst, agx_null(), agx_null(), product);
+}
+
+static agx_index
+agx_umul_high(agx_builder *b, agx_index P, agx_index Q)
+{
+   agx_index dst = agx_temp(b->shader, P.size);
+   agx_umul_high_to(b, dst, P, Q);
+   return dst;
+}
+
  /* Emit code dividing P by Q */
  static agx_index
  agx_udiv_const(agx_builder *b, agx_index P, uint32_t Q)
@@ -241,16 +268,12 @@ agx_udiv_const(agx_builder *b, agx_index P, uint32_t Q)
     agx_index increment = agx_mov_imm(b, 32, info.increment);
     agx_index postshift = agx_mov_imm(b, 32, info.post_shift);
     agx_index multiplier = agx_mov_imm(b, 32, info.multiplier);
-   agx_index multiplied = agx_temp(b->shader, AGX_SIZE_64);
     agx_index n = P;
  
     if (info.pre_shift != 0) n = agx_ushr(b, n, preshift);
     if (info.increment != 0) n = agx_iadd(b, n, increment, 0);
  
-   /* 64-bit multiplication, zero extending 32-bit x 32-bit, get the top word */
-   agx_imad_to(b, multiplied, agx_abs(n), agx_abs(multiplier), agx_zero(), 0);
-   n = agx_temp(b->shader, AGX_SIZE_32);
-   agx_p_extract_to(b, n, multiplied, 1);
+   n = agx_umul_high(b, n, multiplier);
  
     if (info.post_shift != 0) n = agx_ushr(b, n, postshift);
author	Alyssa Rosenzweig <alyssa@collabora.com>
	Tue, 2 Aug 2022 17:38:57 +0000 (13:38 -0400)
committer	Alyssa Rosenzweig <alyssa@rosenzweig.io>
	Mon, 8 Aug 2022 00:43:54 +0000 (20:43 -0400)