From 24669a672f331a10152f9a4d7deb6126a1106211 Mon Sep 17 00:00:00 2001
From: Jesse Natalie <jenatali@microsoft.com>
Date: Thu, 13 Aug 2020 17:21:18 -0700
Subject: [PATCH] nir: Add a printf lowering pass (v5)

This pass creates a SSBO var for the printf buffer. It does an atomic increment
at the beginning of the buffer to determine where to write, then dumps
the args after that.

v2: [airlied]
Enhanced to use an index into a set of format info that is passed
back to the caller. The format info contains the number of args,
argument sizes and the format string.

v3: move format string lowering to vtn

v4: Jason reworked it.

v5: assume buffer has initial offset prebaked in and work from there.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8254>
---
 src/compiler/Makefile.sources       |   1 +
 src/compiler/nir/meson.build        |   1 +
 src/compiler/nir/nir.h              |   7 ++
 src/compiler/nir/nir_intrinsics.py  |   4 +
 src/compiler/nir/nir_lower_printf.c | 143 ++++++++++++++++++++++++++++++++++++
 5 files changed, 156 insertions(+)
 create mode 100644 src/compiler/nir/nir_lower_printf.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 2f3d389..3c533e2 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -289,6 +289,7 @@ NIR_FILES = \
 	nir/nir_lower_phis_to_scalar.c \
 	nir/nir_lower_point_size.c \
 	nir/nir_lower_point_size_mov.c \
+	nir/nir_lower_printf.c \
 	nir/nir_lower_regs_to_ssa.c \
 	nir/nir_lower_returns.c \
 	nir/nir_lower_samplers.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 6895a0f..9c90eaa 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -177,6 +177,7 @@ files_libnir = files(
   'nir_lower_phis_to_scalar.c',
   'nir_lower_point_size.c',
   'nir_lower_point_size_mov.c',
+  'nir_lower_printf.c',
   'nir_lower_regs_to_ssa.c',
   'nir_lower_returns.c',
   'nir_lower_samplers.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 33732db..ad75665 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -4878,6 +4878,13 @@ bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl);
 bool nir_lower_samplers(nir_shader *shader);
 bool nir_lower_ssbo(nir_shader *shader);
 
+typedef struct nir_lower_printf_options {
+   bool treat_doubles_as_floats : 1;
+   unsigned max_buffer_size;
+} nir_lower_printf_options;
+
+bool nir_lower_printf(nir_shader *nir, const nir_lower_printf_options *options);
+
 /* This is here for unit tests. */
 bool nir_opt_comparison_pre_impl(nir_function_impl *impl);
 
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index 9406fcb..e9912d1 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -957,6 +957,10 @@ intrinsic("load_frag_shading_rate", dest_comp=1, bit_sizes=[32],
 # Second source is a deref to a struct containing the args
 # Dest is success or failure
 intrinsic("printf", src_comp=[1, 1], dest_comp=1, bit_sizes=[32])
+# Since most drivers will want to lower to just dumping args
+# in a buffer, nir_lower_printf will do that, but requires
+# the driver to at least provide a base location
+system_value("printf_buffer_address", 1, bit_sizes=[32,64])
 
 # IR3-specific version of most SSBO intrinsics. The only different
 # compare to the originals is that they add an extra source to hold
diff --git a/src/compiler/nir/nir_lower_printf.c b/src/compiler/nir/nir_lower_printf.c
new file mode 100644
index 0000000..1dabe67
--- /dev/null
+++ b/src/compiler/nir/nir_lower_printf.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright Â© 2020 Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_builder_opcodes.h"
+
+#include "util/u_math.h"
+
+static bool
+lower_printf_instr(nir_builder *b, nir_instr *instr, void *_options)
+{
+   const nir_lower_printf_options *options = _options;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *prntf = nir_instr_as_intrinsic(instr);
+   if (prntf->intrinsic != nir_intrinsic_printf)
+      return false;
+
+   nir_ssa_def *fmt_str_id = prntf->src[0].ssa;
+   nir_deref_instr *args = nir_src_as_deref(prntf->src[1]);
+   assert(args->deref_type == nir_deref_type_var);
+
+   const unsigned ptr_bit_size = nir_get_ptr_bitsize(b->shader);
+
+   /* Atomic add a buffer size counter to determine where to write.  If
+    * overflowed, return -1, otherwise, store the arguments and return 0.
+    */
+   b->cursor = nir_before_instr(&prntf->instr);
+   nir_ssa_def *buffer_addr = nir_load_printf_buffer_address(b, ptr_bit_size);
+   nir_deref_instr *buffer =
+      nir_build_deref_cast(b, buffer_addr, nir_var_mem_global,
+                           glsl_array_type(glsl_uint8_t_type(), 0, 4), 0);
+
+   /* Align the struct size to 4 */
+   assert(glsl_type_is_struct_or_ifc(args->type));
+   int args_size = align(glsl_get_cl_size(args->type), 4);
+   assert(fmt_str_id->bit_size == 32);
+   int fmt_str_id_size = 4;
+
+   /* Increment the counter at the beginning of the buffer */
+   const unsigned counter_size = 4;
+   nir_deref_instr *counter = nir_build_deref_array_imm(b, buffer, 0);
+   counter = nir_build_deref_cast(b, &counter->dest.ssa,
+                                  nir_var_mem_global,
+                                  glsl_uint_type(), 0);
+   counter->cast.align_mul = 4;
+   nir_ssa_def *offset =
+      nir_deref_atomic_add(b, 32, &counter->dest.ssa,
+                           nir_imm_int(b, fmt_str_id_size + args_size));
+
+   /* Check if we're still in-bounds */
+   const unsigned default_buffer_size = 1024 * 1024;
+   unsigned buffer_size = (options && options->max_buffer_size) ?
+                          options->max_buffer_size : default_buffer_size;
+   int max_valid_offset =
+      buffer_size - args_size - fmt_str_id_size - counter_size;
+   nir_push_if(b, nir_ilt(b, offset, nir_imm_int(b, max_valid_offset)));
+
+   nir_ssa_def *printf_succ_val = nir_imm_int(b, 0);
+
+   /* Write the format string ID */
+   nir_ssa_def *fmt_str_id_offset =
+      nir_i2i(b, offset, ptr_bit_size);
+   nir_deref_instr *fmt_str_id_deref =
+      nir_build_deref_array(b, buffer, fmt_str_id_offset);
+   fmt_str_id_deref = nir_build_deref_cast(b, &fmt_str_id_deref->dest.ssa,
+                                           nir_var_mem_global,
+                                           glsl_uint_type(), 0);
+   fmt_str_id_deref->cast.align_mul = 4;
+   nir_store_deref(b, fmt_str_id_deref, fmt_str_id, ~0);
+
+   /* Write the format args */
+   for (unsigned i = 0; i < glsl_get_length(args->type); ++i) {
+      nir_deref_instr *arg_deref = nir_build_deref_struct(b, args, i);
+      nir_ssa_def *arg = nir_load_deref(b, arg_deref);
+      const struct glsl_type *arg_type = arg_deref->type;
+
+      /* Clang does promotion of arguments to their "native" size. That means
+       * that any floats have been converted to doubles for the call to
+       * printf. Since doubles are optional, some drivers might not support
+       * them. For those drivers, convert them back to float before writing.
+       * Copy prop and other optimizations should remove all hints of doubles.
+       */
+      if (glsl_get_base_type(arg_type) == GLSL_TYPE_DOUBLE &&
+          options && options->treat_doubles_as_floats) {
+         arg = nir_f2f32(b, arg);
+         arg_type = glsl_float_type();
+      }
+
+      unsigned field_offset = glsl_get_struct_field_offset(args->type, i);
+      nir_ssa_def *arg_offset =
+         nir_i2i(b, nir_iadd_imm(b, offset,
+                                 fmt_str_id_size + field_offset),
+                 ptr_bit_size);
+      nir_deref_instr *dst_arg_deref =
+         nir_build_deref_array(b, buffer, arg_offset);
+      dst_arg_deref = nir_build_deref_cast(b, &dst_arg_deref->dest.ssa,
+                                           nir_var_mem_global, arg_type, 0);
+      assert(field_offset % 4 == 0);
+      dst_arg_deref->cast.align_mul = 4;
+      nir_store_deref(b, dst_arg_deref, arg, ~0);
+   }
+
+   nir_push_else(b, NULL);
+   nir_ssa_def *printf_fail_val = nir_imm_int(b, -1);
+   nir_pop_if(b, NULL);
+
+   nir_ssa_def *ret_val = nir_if_phi(b, printf_succ_val, printf_fail_val);
+   nir_ssa_def_rewrite_uses(&prntf->dest.ssa, nir_src_for_ssa(ret_val));
+   nir_instr_remove(&prntf->instr);
+
+   return true;
+}
+
+bool
+nir_lower_printf(nir_shader *nir, const nir_lower_printf_options *options)
+{
+   return nir_shader_instructions_pass(nir, lower_printf_instr,
+                                       nir_metadata_none,
+                                       (void *)options);
+}
-- 
2.7.4