From a8e740c3547b0209d04b505d95a79794db31ce0b Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Wed, 30 Nov 2016 09:40:43 +0100
Subject: [PATCH] nir/lower_tex: add lowering for texture gradient on cube maps

This is ported from the Intel lowering pass that we use with GLSL IR.
The NIR pass only handles cube maps, not shadow samplers, which are
also lowered for gen < 8 on Intel hardware. We will add support for
that in a later patch, at which point we should be able to remove
the GLSL IR lowering pass.

v2:
- added a helper to retrieve ddx/ddy parameters (Ken)
- No need to make size.z=1.0, we are only using component x anyway (Iago)

v3:
- Get rid of the ddx/ddy helper and use nir_tex_instr_src_index
  instead (Ken, Eric)

v4:
- When emitting the textureLod operation, copy all texture parameters
  from the original textureGrad() (except for ddx/ddy) using a loop
- Add a 'continue' statement if the lowering makes progress because it
  replaces the original texture instruction

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v3)
---
 src/compiler/nir/nir.h           |   5 +
 src/compiler/nir/nir_lower_tex.c | 208 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 213 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 3040cbd..826410d 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2394,6 +2394,11 @@ typedef struct nir_lower_tex_options {
     * of the texture are lowered to linear.
     */
    unsigned lower_srgb;
+
+   /**
+    * If true, lower nir_texop_txd on cube maps with nir_texop_txl.
+    */
+   bool lower_txd_cube_map;
 } nir_lower_tex_options;
 
 bool nir_lower_tex(nir_shader *shader,
diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c
index 08cb668..11773cb 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -304,6 +304,207 @@ lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex)
                       nir_channel(b, xuxv, 3));
 }
 
+/*
+ * Emits a textureLod operation used to replace an existing
+ * textureGrad instruction.
+ */
+static void
+replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
+{
+   /* We are going to emit a textureLod() with the same parameters except that
+    * we replace ddx/ddy with lod.
+    */
+   int num_srcs = tex->num_srcs - 1;
+   nir_tex_instr *txl = nir_tex_instr_create(b->shader, num_srcs);
+
+   txl->op = nir_texop_txl;
+   txl->sampler_dim = tex->sampler_dim;
+   txl->texture_index = tex->texture_index;
+   txl->dest_type = tex->dest_type;
+   txl->is_array = tex->is_array;
+   txl->is_shadow = tex->is_shadow;
+   txl->is_new_style_shadow = tex->is_new_style_shadow;
+   txl->sampler_index = tex->sampler_index;
+   txl->texture = (nir_deref_var *)
+      nir_copy_deref(txl, &tex->texture->deref);
+   txl->sampler = (nir_deref_var *)
+      nir_copy_deref(txl, &tex->sampler->deref);
+   txl->coord_components = tex->coord_components;
+
+   nir_ssa_dest_init(&txl->instr, &txl->dest, 4, 32, NULL);
+
+   int src_num = 0;
+   for (int i = 0; i < tex->num_srcs; i++) {
+      if (tex->src[i].src_type == nir_tex_src_ddx ||
+          tex->src[i].src_type == nir_tex_src_ddy)
+         continue;
+      nir_src_copy(&txl->src[src_num].src, &tex->src[i].src, txl);
+      txl->src[src_num].src_type = tex->src[i].src_type;
+      src_num++;
+   }
+
+   txl->src[src_num].src = nir_src_for_ssa(lod);
+   txl->src[src_num].src_type = nir_tex_src_lod;
+   src_num++;
+
+   assert(src_num == num_srcs);
+
+   nir_ssa_dest_init(&txl->instr, &txl->dest, 4, 32, NULL);
+   nir_builder_instr_insert(b, &txl->instr);
+
+   nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(&txl->dest.ssa));
+
+   nir_instr_remove(&tex->instr);
+}
+
+static void
+lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
+{
+   assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
+   assert(tex->op == nir_texop_txd);
+   assert(tex->dest.is_ssa);
+
+   /* Use textureSize() to get the width and height of LOD 0 */
+   nir_ssa_def *size = get_texture_size(b, tex);
+
+   /* Cubemap texture lookups first generate a texture coordinate normalized
+    * to [-1, 1] on the appropiate face. The appropiate face is determined
+    * by which component has largest magnitude and its sign. The texture
+    * coordinate is the quotient of the remaining texture coordinates against
+    * that absolute value of the component of largest magnitude. This
+    * division requires that the computing of the derivative of the texel
+    * coordinate must use the quotient rule. The high level GLSL code is as
+    * follows:
+    *
+    * Step 1: selection
+    *
+    * vec3 abs_p, Q, dQdx, dQdy;
+    * abs_p = abs(ir->coordinate);
+    * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
+    *    Q = ir->coordinate.yzx;
+    *    dQdx = ir->lod_info.grad.dPdx.yzx;
+    *    dQdy = ir->lod_info.grad.dPdy.yzx;
+    * }
+    * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
+    *    Q = ir->coordinate.xzy;
+    *    dQdx = ir->lod_info.grad.dPdx.xzy;
+    *    dQdy = ir->lod_info.grad.dPdy.xzy;
+    * }
+    * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
+    *    Q = ir->coordinate;
+    *    dQdx = ir->lod_info.grad.dPdx;
+    *    dQdy = ir->lod_info.grad.dPdy;
+    * }
+    *
+    * Step 2: use quotient rule to compute derivative. The normalized to
+    * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
+    * only concerned with the magnitudes of the derivatives whose values are
+    * not affected by the sign. We drop the sign from the computation.
+    *
+    * vec2 dx, dy;
+    * float recip;
+    *
+    * recip = 1.0 / Q.z;
+    * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
+    * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
+    *
+    * Step 3: compute LOD. At this point we have the derivatives of the
+    * texture coordinates normalized to [-1,1]. We take the LOD to be
+    *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
+    *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
+    *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
+    *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
+    *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
+    * where L is the dimension of the cubemap. The code is:
+    *
+    * float M, result;
+    * M = max(dot(dx, dx), dot(dy, dy));
+    * L = textureSize(sampler, 0).x;
+    * result = -1.0 + 0.5 * log2(L * L * M);
+    */
+
+   /* coordinate */
+   nir_ssa_def *p =
+      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
+
+   /* unmodified dPdx, dPdy values */
+   nir_ssa_def *dPdx =
+      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
+   nir_ssa_def *dPdy =
+      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
+
+   nir_ssa_def *abs_p = nir_fabs(b, p);
+   nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
+   nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
+   nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
+
+   /* 1. compute selector */
+   nir_ssa_def *Q, *dQdx, *dQdy;
+
+   nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
+   nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
+
+   unsigned yzx[4] = { 1, 2, 0, 0 };
+   unsigned xzy[4] = { 0, 2, 1, 0 };
+
+   Q = nir_bcsel(b, cond_z,
+                 p,
+                 nir_bcsel(b, cond_y,
+                           nir_swizzle(b, p, xzy, 3, false),
+                           nir_swizzle(b, p, yzx, 3, false)));
+
+   dQdx = nir_bcsel(b, cond_z,
+                    dPdx,
+                    nir_bcsel(b, cond_y,
+                              nir_swizzle(b, dPdx, xzy, 3, false),
+                              nir_swizzle(b, dPdx, yzx, 3, false)));
+
+   dQdy = nir_bcsel(b, cond_z,
+                    dPdy,
+                    nir_bcsel(b, cond_y,
+                              nir_swizzle(b, dPdy, xzy, 3, false),
+                              nir_swizzle(b, dPdy, yzx, 3, false)));
+
+   /* 2. quotient rule */
+
+   /* tmp = Q.xy * recip;
+    * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
+    * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
+    */
+   nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
+
+   unsigned xy[4] = { 0, 1, 0, 0 };
+   nir_ssa_def *Q_xy = nir_swizzle(b, Q, xy, 2, false);
+   nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
+
+   nir_ssa_def *dQdx_xy = nir_swizzle(b, dQdx, xy, 2, false);
+   nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
+   nir_ssa_def *dx =
+      nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
+
+   nir_ssa_def *dQdy_xy = nir_swizzle(b, dQdy, xy, 2, false);
+   nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
+   nir_ssa_def *dy =
+      nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
+
+   /* M = max(dot(dx, dx), dot(dy, dy)); */
+   nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
+
+   /* size has textureSize() of LOD 0 */
+   nir_ssa_def *L = nir_channel(b, size, 0);
+
+   /* lod = -1.0 + 0.5 * log2(L * L * M); */
+   nir_ssa_def *lod =
+      nir_fadd(b,
+               nir_imm_float(b, -1.0f),
+               nir_fmul(b,
+                        nir_imm_float(b, 0.5f),
+                        nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
+
+   /* 3. Replace the gradient instruction with an equivalent lod instruction */
+   replace_gradient_with_lod(b, lod, tex);
+}
+
 static void
 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
 {
@@ -529,6 +730,13 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
          linearize_srgb_result(b, tex);
          progress = true;
       }
+
+      if (tex->op == nir_texop_txd && options->lower_txd_cube_map &&
+          tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+         lower_gradient_cube_map(b, tex);
+         progress = true;
+         continue;
+      }
    }
 
    return progress;
-- 
2.7.4