gallivm: fix nir AOS swizzling issues

author Brian Paul <brianp@vmware.com>

Tue, 17 May 2022 03:29:12 +0000 (21:29 -0600)

committer Marge Bot <emma+marge@anholt.net>

Wed, 21 Sep 2022 21:55:11 +0000 (21:55 +0000)
author Brian Paul <brianp@vmware.com>
Tue, 17 May 2022 03:29:12 +0000 (21:29 -0600)
committer Marge Bot <emma+marge@anholt.net>
Wed, 21 Sep 2022 21:55:11 +0000 (21:55 +0000)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c

index 339f955..8d71fb3 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
@@ -325,7 +325,7 @@ icmp32(struct lp_build_nir_context *bld_base,
  
  /**
   * Get a source register value for an ALU instruction.
- * This is where swizzled are handled.  There should be no negation
+ * This is where swizzles are handled.  There should be no negation
   * or absolute value modifiers.
   * num_components indicates the number of components needed in the
   * returned array or vector.
@@ -335,25 +335,63 @@ get_alu_src(struct lp_build_nir_context *bld_base,
              nir_alu_src src,
              unsigned num_components)
  {
+   assert(!src.negate);
+   assert(!src.abs);
+   assert(num_components >= 1);
+   assert(num_components <= 4);
+
     struct gallivm_state *gallivm = bld_base->base.gallivm;
     LLVMBuilderRef builder = gallivm->builder;
+   const unsigned src_components = nir_src_num_components(src.src);
+   assert(src_components > 0);
     LLVMValueRef value = get_src(bld_base, src.src);
-   bool need_swizzle = false;
-
     assert(value);
  
-   if (is_aos(bld_base))
-      return value;
-
-   unsigned src_components = nir_src_num_components(src.src);
-   for (unsigned i = 0; i < num_components; ++i) {
-      assert(src.swizzle[i] < src_components);
-      if (src.swizzle[i] != i)
+   /* check if swizzling needed for the src vector */
+   bool need_swizzle = false;
+   for (unsigned i = 0; i < src_components; ++i) {
+      if (src.swizzle[i] != i) {
           need_swizzle = true;
+         break;
+      }
+   }
+
+   if (is_aos(bld_base) && !need_swizzle) {
+      return value;
     }
  
     if (need_swizzle || num_components != src_components) {
-      if (src_components > 1 && num_components == 1) {
+      if (is_aos(bld_base) && need_swizzle) {
+         // Handle swizzle for AOS
+         assert(LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind);
+
+         // swizzle vector of ((r,g,b,a), (r,g,b,a), (r,g,b,a), (r,g,b,a))
+         assert(bld_base->base.type.width == 8);
+         assert(bld_base->base.type.length == 16);
+
+         // Do our own swizzle here since lp_build_swizzle_aos_n() does
+         // not do what we want.
+         // Ex: value = {r0,g0,b0,a0, r1,g1,b1,a1, r2,g2,b2,a2, r3,g3,b3,a3}.
+         // aos swizzle = {2,1,0,3}  // swap red/blue
+         // shuffles = {2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}
+         // result = {b0,g0,r0,a0, b1,g1,r1,a1, b2,g2,r2,a2, b3,g3,r3,a3}.
+         LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
+         for (unsigned i = 0; i < 16; i++) {
+            unsigned chan = i % 4;
+            /* apply src register swizzle */
+            if (chan < num_components) {
+               chan = src.swizzle[chan];
+            } else {
+               chan = src.swizzle[0];
+            }
+            /* apply aos swizzle */
+            chan = lp_nir_aos_swizzle(bld_base, chan);
+            shuffles[i] = lp_build_const_int32(gallivm, (i & ~3) + chan);
+         }
+         value = LLVMBuildShuffleVector(builder, value,
+                                        LLVMGetUndef(LLVMTypeOf(value)),
+                                        LLVMConstVector(shuffles, 16), "");
+      } else if (src_components > 1 && num_components == 1) {
           value = LLVMBuildExtractValue(gallivm->builder, value,
                                         src.swizzle[0], "");
        } else if (src_components == 1 && num_components > 1) {
@@ -369,8 +407,7 @@ get_alu_src(struct lp_build_nir_context *bld_base,
           value = arr;
        }
     }
-   assert(!src.negate);
-   assert(!src.abs);
+
     return value;
  }
  
@@ -1262,14 +1299,6 @@ visit_alu(struct lp_build_nir_context *bld_base,
                             result[0], temp_chan);
        }
     } else if (is_aos(bld_base)) {
-      if (instr->op == nir_op_fmul) {
-         if (LLVMIsConstant(src[0])) {
-            src[0] = lp_nir_aos_conv_const(gallivm, src[0], 1);
-         }
-         if (LLVMIsConstant(src[1])) {
-            src[1] = lp_nir_aos_conv_const(gallivm, src[1], 1);
-         }
-      }
        result[0] = do_alu_action(bld_base, instr, src_bit_size, src);
     } else {
        /* Loop for R,G,B,A channels */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/src/gallium/auxiliary/gallivm/lp_bld_nir.h

index d02da2b..922208d 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.h
@@ -358,9 +358,8 @@ get_int_bld(struct lp_build_nir_context *bld_base,
  }
  
  
-LLVMValueRef
-lp_nir_aos_conv_const(struct gallivm_state *gallivm,
-                      LLVMValueRef constval, int nc);
+unsigned
+lp_nir_aos_swizzle(struct lp_build_nir_context *bld_base, unsigned chan);
  
  
  #endif
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c

index e4a332a..36a2304 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c
@@ -90,36 +90,6 @@ swizzle_aos(struct lp_build_nir_context *bld_base,
  }
  
  
-LLVMValueRef
-lp_nir_aos_conv_const(struct gallivm_state *gallivm,
-                      LLVMValueRef constval, int nc)
-{
-   LLVMValueRef elems[16];
-   uint8_t val = 0;
-   /* convert from 1..4 x f32 to 16 x unorm8 */
-   for (unsigned i = 0; i < nc; i++) {
-      LLVMValueRef value =
-         LLVMBuildExtractElement(gallivm->builder, constval,
-                                 lp_build_const_int32(gallivm, i), "");
-      assert(LLVMIsConstant(value));
-      unsigned uval = LLVMConstIntGetZExtValue(value);
-      float f = uif(uval);
-      val = float_to_ubyte(f);
-      for (unsigned j = 0; j < 4; j++) {
-         elems[j * 4 + i] =
-            LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), val, 0);
-      }
-   }
-   for (unsigned i = nc; i < 4; i++) {
-      for (unsigned j = 0; j < 4; j++) {
-         elems[j * 4 + i] =
-            LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), val, 0);
-      }
-   }
-   return LLVMConstVector(elems, 16);
-}
-
-
  static void
  init_var_slots(struct lp_build_nir_context *bld_base,
                 nir_variable *var)
@@ -183,10 +153,6 @@ emit_store_var(struct lp_build_nir_context *bld_base,
     struct gallivm_state *gallivm = bld_base->base.gallivm;
     unsigned location = var->data.driver_location;
  
-   if (LLVMIsConstant(vals)) {
-      vals = lp_nir_aos_conv_const(gallivm, vals, num_components);
-   }
-
     if (deref_mode == nir_var_shader_out) {
        LLVMBuildStore(gallivm->builder, vals, bld->outputs[location]);
     }
@@ -205,6 +171,37 @@ emit_load_reg(struct lp_build_nir_context *bld_base,
  }
  
  
+unsigned
+lp_nir_aos_swizzle(struct lp_build_nir_context *bld_base, unsigned chan)
+{
+   struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base);
+   return bld->swizzles[chan];
+}
+
+
+/*
+ * If an instruction has a writemask like r0.x = foo and the
+ * AOS/linear context uses swizzle={2,1,0,3} we need to change
+ * the writemask to r0.z
+ */
+static unsigned
+swizzle_writemask(struct lp_build_nir_aos_context *bld,
+                  unsigned writemask)
+{
+   assert(writemask != 0x0);
+   assert(writemask != 0xf);
+
+   // Ex: swap r/b channels
+   unsigned new_writemask = 0;
+   for (unsigned chan = 0; chan < 4; chan++) {
+      if (writemask & (1 << chan)) {
+         new_writemask |= 1 << bld->swizzles[chan];
+      }
+   }
+   return new_writemask;
+}
+
+
  static void
  emit_store_reg(struct lp_build_nir_context *bld_base,
                 struct lp_build_context *reg_bld,
@@ -214,17 +211,18 @@ emit_store_reg(struct lp_build_nir_context *bld_base,
                 LLVMValueRef reg_storage,
                 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
  {
+   struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base);
     struct gallivm_state *gallivm = bld_base->base.gallivm;
  
-   if (LLVMIsConstant(vals[0]))
-      vals[0] = lp_nir_aos_conv_const(gallivm, vals[0], 1);
-
     if (writemask == 0xf) {
        LLVMBuildStore(gallivm->builder, vals[0], reg_storage);
        return;
     }
  
-   LLVMValueRef cur = LLVMBuildLoad2(gallivm->builder, reg_bld->vec_type, reg_storage, "");
+   writemask = swizzle_writemask(bld, writemask);
+
+   LLVMValueRef cur = LLVMBuildLoad2(gallivm->builder, reg_bld->vec_type,
+                                     reg_storage, "");
     LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
     LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
     for (unsigned j = 0; j < 16; j++) {
@@ -325,21 +323,31 @@ emit_load_const(struct lp_build_nir_context *bld_base,
                  LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
  {
     struct lp_build_nir_aos_context *bld = lp_nir_aos_context(bld_base);
-   struct gallivm_state *gallivm = bld_base->base.gallivm;
-   LLVMValueRef elems[4];
+   LLVMValueRef elems[16];
     const int nc = instr->def.num_components;
     bool do_swizzle = false;
  
     if (nc == 4)
        do_swizzle = true;
  
-   for (unsigned i = 0; i < nc; i++) {
-      int idx = do_swizzle ? bld->swizzles[i] : i;
-      elems[idx] = LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
-                                instr->value[i].u32,
-                                bld_base->base.type.sign ? 1 : 0);
+   /* The constant is something like {float, float, float, float}.
+    * We need to convert the float values from [0,1] to ubyte in [0,255].
+    * We previously checked for values outside [0,1] in
+    * llvmpipe_nir_fn_is_linear_compat().
+    * Also, we convert the (typically) 4-element float constant into a
+    * swizzled 16-element ubyte constant (z,y,x,w, z,y,x,w, z,y,x,w, z,y,x,w)
+    * since that's what 'linear' mode operates on.
+    */
+   assert(bld_base->base.type.length <= ARRAY_SIZE(elems));
+   for (unsigned i = 0; i < bld_base->base.type.length; i++) {
+      const unsigned j = do_swizzle ? bld->swizzles[i % nc] : i % nc;
+      assert(instr->value[j].f32 >= 0.0f);
+      assert(instr->value[j].f32 <= 1.0f);
+      const unsigned u8val = float_to_ubyte(instr->value[j].f32);
+      elems[i] = LLVMConstInt(bld_base->uint_bld.int_elem_type, u8val, 0);
     }
-   outval[0] = LLVMConstVector(elems, nc);
+   outval[0] = LLVMConstVector(elems, bld_base->base.type.length);
+   outval[1] = outval[2] = outval[3] = NULL;
  }
  
  
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c b/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c

index 302d4a4..ad24053 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c
@@ -285,6 +285,25 @@ get_texcoord_provenance(const nir_tex_src *texcoord,
  
  
  /*
+ * Check if all the values of a nir_load_const_instr are 32-bit
+ * floats in the range [0,1].  If so, return true, else return false.
+ */
+static bool
+check_load_const_in_zero_one(const nir_load_const_instr *load)
+{
+   if (load->def.bit_size != 32)
+      return false;
+   for (unsigned c = 0; c < load->def.num_components; c++) {
+      float val = load->value[c].f32;
+      if (val < 0.0 || val > 1.0 || isnan(val)) {
+         return false;
+      }
+   }
+   return true;
+}
+
+
+/*
   * Examine the NIR shader to determine if it's "linear".
   */
  static bool
@@ -296,8 +315,14 @@ llvmpipe_nir_fn_is_linear_compat(const struct nir_shader *shader,
        nir_foreach_instr_safe(instr, block) {
           switch (instr->type) {
           case nir_instr_type_deref:
-         case nir_instr_type_load_const:
              break;
+         case nir_instr_type_load_const: {
+            nir_load_const_instr *load = nir_instr_as_load_const(instr);
+            if (!check_load_const_in_zero_one(load)) {
+               return false;
+            }
+            break;
+         }
           case nir_instr_type_intrinsic: {
              nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
              if (intrin->intrinsic != nir_intrinsic_load_deref &&
@@ -384,14 +409,8 @@ llvmpipe_nir_fn_is_linear_compat(const struct nir_shader *shader,
                    if (nir_src_is_const(alu->src[s].src)) {
                       nir_load_const_instr *load =
                          nir_instr_as_load_const(alu->src[s].src.ssa->parent_instr);
-
-                     if (load->def.bit_size != 32)
+                     if (!check_load_const_in_zero_one(load)) {
                          return false;
-                     for (unsigned c = 0; c < load->def.num_components; c++) {
-                        if (load->value[c].f32 < 0.0 || load->value[c].f32 > 1.0) {
-                           info->unclamped_immediates = true;
-                           return false;
-                        }
                       }
                    }
                 }
@@ -437,7 +456,6 @@ llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader)
         shader->info.base.num_outputs == 1 &&
         !shader->info.indirect_textures &&
         !shader->info.sampler_texture_units_different &&
-       !shader->info.unclamped_immediates &&
         shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES &&
         llvmpipe_nir_is_linear_compat(shader->base.ir.nir, &shader->info)) {
        shader->kind = LP_FS_KIND_LLVM_LINEAR;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c b/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c

index 41bbe84..3f79475 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c
@@ -279,7 +279,7 @@ llvmpipe_fs_variant_linear_llvm(struct llvmpipe_context *lp,
      */
  
     char func_name[256];
-   snprintf(func_name, sizeof(func_name), "fs_variant_linear");
+   snprintf(func_name, sizeof(func_name), "fs_variant_linear2");
  
     LLVMTypeRef ret_type = pint8t;
     LLVMTypeRef arg_types[4];
author	Brian Paul <brianp@vmware.com>
	Tue, 17 May 2022 03:29:12 +0000 (21:29 -0600)
committer	Marge Bot <emma+marge@anholt.net>
	Wed, 21 Sep 2022 21:55:11 +0000 (21:55 +0000)
src/gallium/auxiliary/gallivm/lp_bld_nir.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_nir.h		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_nir_aos.c		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_state_fs_linear_llvm.c		patch \| blob \| history