From fcd2ef23e5f1d50008166168e772815c0213e37c Mon Sep 17 00:00:00 2001
From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= <daniel@schuermann.dev>
Date: Mon, 31 Aug 2020 10:55:51 +0100
Subject: [PATCH] radv: vectorize 16bit instructions

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6680>
---
 docs/relnotes/new_features.txt |  1 +
 src/amd/vulkan/radv_pipeline.c | 35 +++++++++++++++++++++++++++++++++++
 src/amd/vulkan/radv_shader.c   |  1 +
 3 files changed, 37 insertions(+)

diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt
index f129904..b6bdd13 100644
--- a/docs/relnotes/new_features.txt
+++ b/docs/relnotes/new_features.txt
@@ -12,3 +12,4 @@ Classic swrast dri driver removed in favor of gallium swrast (llvmpipe or softpi
 Panfrost g31/g52/g72 exposes ES 3.0
 Panfrost t760+ exposes GL 3.1 (including on Bifrost)
 Sparse memory support on RADV
+Rapid packed math (16bit-vectorization) on RADV
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index fbdd9c9..a910cfc 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3157,6 +3157,39 @@ lower_bit_size_callback(const nir_instr *instr, void *_)
 	return 0;
 }
 
+static bool
+opt_vectorize_callback(const nir_instr *instr, void *_)
+{
+   assert(instr->type == nir_instr_type_alu);
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
+   unsigned bit_size = alu->dest.dest.ssa.bit_size;
+   if (bit_size != 16)
+      return false;
+
+   switch (alu->op) {
+   case nir_op_fadd:
+   case nir_op_fsub:
+   case nir_op_fmul:
+   case nir_op_fneg:
+   case nir_op_fsat:
+   case nir_op_fmin:
+   case nir_op_fmax:
+   case nir_op_iadd:
+   case nir_op_isub:
+   case nir_op_imul:
+   case nir_op_imin:
+   case nir_op_imax:
+   case nir_op_umin:
+   case nir_op_umax:
+   case nir_op_ishl:
+   case nir_op_ishr:
+   case nir_op_ushr:
+      return true;
+   default:
+      return false;
+   }
+}
+
 VkResult radv_create_shaders(struct radv_pipeline *pipeline,
                              struct radv_device *device,
                              struct radv_pipeline_cache *cache,
@@ -3373,6 +3406,8 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline,
 
 				if (device->physical_device->rad_info.chip_class >= GFX8)
 					nir_opt_remove_phis(nir[i]); /* cleanup LCSSA phis */
+				if (device->physical_device->rad_info.chip_class >= GFX9)
+					NIR_PASS_V(nir[i], nir_opt_vectorize, opt_vectorize_callback, NULL);
 			}
 
 			/* cleanup passes */
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 653bb0b..8469138 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -84,6 +84,7 @@ static const struct nir_shader_compiler_options nir_options = {
 	.use_scoped_barrier = true,
 	.max_unroll_iterations = 32,
 	.use_interpolated_input_intrinsics = true,
+	.vectorize_vec2_16bit = true,
 	/* nir_lower_int64() isn't actually called for the LLVM backend, but
 	 * this helps the loop unrolling heuristics. */
 	.lower_int64_options = nir_lower_imul64 |
-- 
2.7.4