From 9f767db1265e2fffb8886d608eba9dd1220547df Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Wed, 9 Aug 2023 16:59:00 -0500
Subject: [PATCH] nv50/ir: Rework conversions for texture array indices

Currently, negative array texture indices get saturated to 0 which,
while technically in-bounds, isn't what we want for Vulkan with image
robustness or robustness2.  Vulkan requires that a negative index on a
texelFetch() count as out-of-bounds but a negative index on any other
texture operation gets clamped to 0.  (See the spec section entitled
"(u,v,w,a) to (i,j,k,l,n) Transformation And Array Layer Selection").

Instead of using CVT for TXF, we now take U32 MAX with 0xffff. Because
it's unsigned, this ensures that negative array indices clamp to 0xffff
and will be considered out-of-bounds by the hardware (there are a
maximum of 2048 array indices in an image descriptor).  For everything
other than TXF, we keep using an F32->U16 conversion but add a saturate.
This ensures that negative array indices clamp to 0 as per the Vulkan
spec.  Very large indices will clamp to 0xffff which the hardware will
clamp to the maximum array index.

This fixes 324 tests in the dEQP-VK.robustness.* group, all those for 1D
and 2D array textures

Acked-by: M Henning <drawoc@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24593>
---
 src/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 29 +++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/src/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index dfe7732..f2190a9 100644
--- a/src/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1024,9 +1024,24 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
       if (i->tex.target.isArray()) {
          LValue *layer = new_LValue(func, FILE_GPR);
          Value *src = i->getSrc(lyr);
-         const int sat = (i->op == OP_TXF) ? 1 : 0;
-         DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
-         bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat;
+         /* Vulkan requires that a negative index on a texelFetch() count as
+          * out-of-bounds but a negative index on any other texture operation
+          * gets clamped to 0.  (See the spec section entitled "(u,v,w,a) to
+          * (i,j,k,l,n) Transformation And Array Layer Selection").
+          *
+          * For TXF, we take a U32 MAX with 0xffff, ensuring that negative
+          * array indices clamp to 0xffff and will be considered out-of-bounds
+          * by the hardware (there are a maximum of 2048 array indices in an
+          * image descriptor).  For everything else, we use a saturating F32
+          * to U16 conversion which will clamp negative array indices to 0 and
+          * large positive indices to 0xffff.  The hardware will further clamp
+          * positive array indices to the maximum in the image descriptor.
+          */
+         if (i->op == OP_TXF) {
+            bld.mkOp2(OP_MIN, TYPE_U32, layer, src, bld.loadImm(NULL, 0xffff));
+         } else {
+            bld.mkCvt(OP_CVT, TYPE_U16, layer, TYPE_F32, src)->saturate = true;
+         }
          if (i->op != OP_TXD || chipset < NVISA_GM107_CHIPSET) {
             for (int s = dim; s >= 1; --s)
                i->setSrc(s, i->getSrc(s - 1));
@@ -1092,9 +1107,11 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
       }
 
       if (arrayIndex) {
-         int sat = (i->op == OP_TXF) ? 1 : 0;
-         DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
-         bld.mkCvt(OP_CVT, TYPE_U16, src, sTy, arrayIndex)->saturate = sat;
+         if (i->op == OP_TXF) {
+            bld.mkOp2(OP_MIN, TYPE_U32, src, arrayIndex, bld.loadImm(NULL, 0xffff));
+         } else {
+            bld.mkCvt(OP_CVT, TYPE_U16, src, TYPE_F32, arrayIndex)->saturate = true;
+         }
       } else {
          bld.loadImm(src, 0);
       }
-- 
2.7.4