From c1ad396258288450c1725fbd359deb339ba564f9 Mon Sep 17 00:00:00 2001
From: Jeff Bolz <jbolz@nvidia.com>
Date: Wed, 10 Jul 2019 13:06:15 -0500
Subject: [PATCH] Avoid generating 8/16-bit constants when 8/16-bit arithmetic
 extensions aren't enabled

---
 Test/baseResults/spv.16bitstorage-int.frag.out  | 18 ++++-----
 Test/baseResults/spv.16bitstorage-uint.frag.out | 13 +++----
 Test/baseResults/spv.16bitstorage.frag.out      | 16 ++++----
 Test/baseResults/spv.8bitstorage-int.frag.out   | 18 ++++-----
 Test/baseResults/spv.8bitstorage-uint.frag.out  | 13 +++----
 Test/baseResults/spv.bufferhandle11.frag.out    | 10 ++---
 glslang/Include/intermediate.h                  | 21 ++++++-----
 glslang/MachineIndependent/Intermediate.cpp     | 36 ++++++------------
 glslang/MachineIndependent/ParseHelper.cpp      | 50 +++++++++++++++++++++++++
 glslang/MachineIndependent/localintermediate.h  | 21 +++++++++++
 10 files changed, 136 insertions(+), 80 deletions(-)

diff --git a/Test/baseResults/spv.16bitstorage-int.frag.out b/Test/baseResults/spv.16bitstorage-int.frag.out
index e66631c..c1aacb8 100644
--- a/Test/baseResults/spv.16bitstorage-int.frag.out
+++ b/Test/baseResults/spv.16bitstorage-int.frag.out
@@ -1,8 +1,7 @@
 spv.16bitstorage-int.frag
-Validation failed
 // Module Version 10000
 // Generated by (magic number): 80007
-// Id's are bound by 172
+// Id's are bound by 171
 
                               Capability Shader
                               Capability StorageUniformBufferBlock16
@@ -210,10 +209,7 @@ Validation failed
              114:     20(int) Constant 7
              115:     20(int) Constant 6
              116:             TypePointer Uniform 20(int)
-             166:  6(int16_t) Constant 1
-             167:  6(int16_t) Constant 2
-             168:  7(i16vec2) ConstantComposite 166 167
-             170:  6(int16_t) Constant 3
+             166:   39(ivec2) ConstantComposite 32 33
          4(main):           2 Function None 3
                5:             Label
           69(x0):     68(ptr) Variable Function
@@ -333,9 +329,11 @@ Validation failed
              164:  6(int16_t) Load 163
              165:     28(ptr) AccessChain 19(b2) 21
                               Store 165 164
-             169:     42(ptr) AccessChain 19(b2) 32
-                              Store 169 168
-             171:     28(ptr) AccessChain 19(b2) 21
-                              Store 171 170
+             167:  7(i16vec2) SConvert 166
+             168:     42(ptr) AccessChain 19(b2) 32
+                              Store 168 167
+             169:  6(int16_t) SConvert 58
+             170:     28(ptr) AccessChain 19(b2) 21
+                              Store 170 169
                               Return
                               FunctionEnd
diff --git a/Test/baseResults/spv.16bitstorage-uint.frag.out b/Test/baseResults/spv.16bitstorage-uint.frag.out
index 6289256..ba2e0c6 100644
--- a/Test/baseResults/spv.16bitstorage-uint.frag.out
+++ b/Test/baseResults/spv.16bitstorage-uint.frag.out
@@ -1,5 +1,4 @@
 spv.16bitstorage-uint.frag
-Validation failed
 // Module Version 10000
 // Generated by (magic number): 80007
 // Id's are bound by 173
@@ -211,10 +210,8 @@ Validation failed
              115:     20(int) Constant 7
              116:     20(int) Constant 6
              117:             TypePointer Uniform 9(int)
-             167:  6(int16_t) Constant 1
-             168:  6(int16_t) Constant 2
-             169:  7(i16vec2) ConstantComposite 167 168
-             171:  6(int16_t) Constant 3
+             167:   39(ivec2) ConstantComposite 82 10
+             170:      9(int) Constant 3
          4(main):           2 Function None 3
                5:             Label
           69(x0):     68(ptr) Variable Function
@@ -334,8 +331,10 @@ Validation failed
              165:  6(int16_t) Load 164
              166:     28(ptr) AccessChain 19(b2) 21
                               Store 166 165
-             170:     42(ptr) AccessChain 19(b2) 32
-                              Store 170 169
+             168:  7(i16vec2) UConvert 167
+             169:     42(ptr) AccessChain 19(b2) 32
+                              Store 169 168
+             171:  6(int16_t) UConvert 170
              172:     28(ptr) AccessChain 19(b2) 21
                               Store 172 171
                               Return
diff --git a/Test/baseResults/spv.16bitstorage.frag.out b/Test/baseResults/spv.16bitstorage.frag.out
index 133ceed..5530cf4 100644
--- a/Test/baseResults/spv.16bitstorage.frag.out
+++ b/Test/baseResults/spv.16bitstorage.frag.out
@@ -1,8 +1,7 @@
 spv.16bitstorage.frag
-Validation failed
 // Module Version 10000
 // Generated by (magic number): 80007
-// Id's are bound by 172
+// Id's are bound by 173
 
                               Capability Shader
                               Capability StorageUniformBufferBlock16
@@ -210,10 +209,9 @@ Validation failed
              114:     20(int) Constant 7
              115:     20(int) Constant 6
              116:             TypePointer Uniform 20(int)
-             166:6(float16_t) Constant 15360
-             167:6(float16_t) Constant 16384
-             168:  7(f16vec2) ConstantComposite 166 167
-             170:6(float16_t) Constant 16896
+             166:   37(float) Constant 1073741824
+             167:   40(fvec2) ConstantComposite 83 166
+             170:   37(float) Constant 1077936128
          4(main):           2 Function None 3
                5:             Label
           70(x0):     69(ptr) Variable Function
@@ -333,9 +331,11 @@ Validation failed
              164:6(float16_t) Load 163
              165:     28(ptr) AccessChain 19(b2) 21
                               Store 165 164
+             168:  7(f16vec2) FConvert 167
              169:     43(ptr) AccessChain 19(b2) 32
                               Store 169 168
-             171:     28(ptr) AccessChain 19(b2) 21
-                              Store 171 170
+             171:6(float16_t) FConvert 170
+             172:     28(ptr) AccessChain 19(b2) 21
+                              Store 172 171
                               Return
                               FunctionEnd
diff --git a/Test/baseResults/spv.8bitstorage-int.frag.out b/Test/baseResults/spv.8bitstorage-int.frag.out
index 4c8bf13..47e854f 100644
--- a/Test/baseResults/spv.8bitstorage-int.frag.out
+++ b/Test/baseResults/spv.8bitstorage-int.frag.out
@@ -1,8 +1,7 @@
 spv.8bitstorage-int.frag
-Validation failed
 // Module Version 10000
 // Generated by (magic number): 80007
-// Id's are bound by 172
+// Id's are bound by 171
 
                               Capability Shader
                               Capability UniformAndStorageBuffer8BitAccess
@@ -209,10 +208,7 @@ Validation failed
              114:     20(int) Constant 7
              115:     20(int) Constant 6
              116:             TypePointer Uniform 20(int)
-             166:   6(int8_t) Constant 1
-             167:   6(int8_t) Constant 2
-             168:   7(i8vec2) ConstantComposite 166 167
-             170:   6(int8_t) Constant 3
+             166:   39(ivec2) ConstantComposite 32 33
          4(main):           2 Function None 3
                5:             Label
           69(x0):     68(ptr) Variable Function
@@ -332,9 +328,11 @@ Validation failed
              164:   6(int8_t) Load 163
              165:     28(ptr) AccessChain 19(b2) 21
                               Store 165 164
-             169:     42(ptr) AccessChain 19(b2) 32
-                              Store 169 168
-             171:     28(ptr) AccessChain 19(b2) 21
-                              Store 171 170
+             167:   7(i8vec2) SConvert 166
+             168:     42(ptr) AccessChain 19(b2) 32
+                              Store 168 167
+             169:   6(int8_t) SConvert 58
+             170:     28(ptr) AccessChain 19(b2) 21
+                              Store 170 169
                               Return
                               FunctionEnd
diff --git a/Test/baseResults/spv.8bitstorage-uint.frag.out b/Test/baseResults/spv.8bitstorage-uint.frag.out
index 10c2b2a..a66c6a3 100644
--- a/Test/baseResults/spv.8bitstorage-uint.frag.out
+++ b/Test/baseResults/spv.8bitstorage-uint.frag.out
@@ -1,5 +1,4 @@
 spv.8bitstorage-uint.frag
-Validation failed
 // Module Version 10000
 // Generated by (magic number): 80007
 // Id's are bound by 173
@@ -210,10 +209,8 @@ Validation failed
              115:     20(int) Constant 7
              116:     20(int) Constant 6
              117:             TypePointer Uniform 9(int)
-             167:   6(int8_t) Constant 1
-             168:   6(int8_t) Constant 2
-             169:   7(i8vec2) ConstantComposite 167 168
-             171:   6(int8_t) Constant 3
+             167:   39(ivec2) ConstantComposite 82 10
+             170:      9(int) Constant 3
          4(main):           2 Function None 3
                5:             Label
           69(x0):     68(ptr) Variable Function
@@ -333,8 +330,10 @@ Validation failed
              165:   6(int8_t) Load 164
              166:     28(ptr) AccessChain 19(b2) 21
                               Store 166 165
-             170:     42(ptr) AccessChain 19(b2) 32
-                              Store 170 169
+             168:   7(i8vec2) UConvert 167
+             169:     42(ptr) AccessChain 19(b2) 32
+                              Store 169 168
+             171:   6(int8_t) UConvert 170
              172:     28(ptr) AccessChain 19(b2) 21
                               Store 172 171
                               Return
diff --git a/Test/baseResults/spv.bufferhandle11.frag.out b/Test/baseResults/spv.bufferhandle11.frag.out
index 0cc97ac..3469715 100644
--- a/Test/baseResults/spv.bufferhandle11.frag.out
+++ b/Test/baseResults/spv.bufferhandle11.frag.out
@@ -2,10 +2,9 @@ spv.bufferhandle11.frag
 WARNING: 0:6: '' : all default precisions are highp; use precision statements to quiet warning, e.g.:
          "precision mediump int; precision highp float;" 
 
-Validation failed
 // Module Version 10000
 // Generated by (magic number): 80007
-// Id's are bound by 60
+// Id's are bound by 61
 
                               Capability Shader
                               Capability StorageBuffer8BitAccess
@@ -69,7 +68,7 @@ Validation failed
               50:     49(ptr) Variable StorageBuffer
               51:             TypePointer StorageBuffer 6(int)
               54:     31(int) Constant 1
-              58:  27(int8_t) Constant 9
+              58:      6(int) Constant 9
          4(main):           2 Function None 3
                5:             Label
        20(allOk):     19(ptr) Variable Function
@@ -104,8 +103,9 @@ Validation failed
               47:             Label
               56:     33(ptr) AccessChain 30 32
               57:     25(ptr) Load 56
-              59:     36(ptr) AccessChain 57 32
-                              Store 59 58 Aligned 16
+              59:  27(int8_t) UConvert 58
+              60:     36(ptr) AccessChain 57 32
+                              Store 60 59 Aligned 16
                               Return
                               FunctionEnd
 12(compare_uint8_t(u1;u1;):     8(bool) Function None 9
diff --git a/glslang/Include/intermediate.h b/glslang/Include/intermediate.h
index 903a75d..cbc9cd6 100644
--- a/glslang/Include/intermediate.h
+++ b/glslang/Include/intermediate.h
@@ -654,9 +654,21 @@ enum TOperator {
     EOpConstructBool,
     EOpConstructFloat,
     EOpConstructDouble,
+    // Keep vector and matrix constructors in a consistent relative order for
+    // TParseContext::constructBuiltIn, which converts between 8/16/32 bit
+    // vector constructors
     EOpConstructVec2,
     EOpConstructVec3,
     EOpConstructVec4,
+    EOpConstructMat2x2,
+    EOpConstructMat2x3,
+    EOpConstructMat2x4,
+    EOpConstructMat3x2,
+    EOpConstructMat3x3,
+    EOpConstructMat3x4,
+    EOpConstructMat4x2,
+    EOpConstructMat4x3,
+    EOpConstructMat4x4,
     EOpConstructDVec2,
     EOpConstructDVec3,
     EOpConstructDVec4,
@@ -687,15 +699,6 @@ enum TOperator {
     EOpConstructU64Vec2,
     EOpConstructU64Vec3,
     EOpConstructU64Vec4,
-    EOpConstructMat2x2,
-    EOpConstructMat2x3,
-    EOpConstructMat2x4,
-    EOpConstructMat3x2,
-    EOpConstructMat3x3,
-    EOpConstructMat3x4,
-    EOpConstructMat4x2,
-    EOpConstructMat4x3,
-    EOpConstructMat4x4,
     EOpConstructDMat2x2,
     EOpConstructDMat2x3,
     EOpConstructDMat2x4,
diff --git a/glslang/MachineIndependent/Intermediate.cpp b/glslang/MachineIndependent/Intermediate.cpp
index d841c90..1dd6dce 100644
--- a/glslang/MachineIndependent/Intermediate.cpp
+++ b/glslang/MachineIndependent/Intermediate.cpp
@@ -574,24 +574,6 @@ TIntermTyped* TIntermediate::createConversion(TBasicType convertTo, TIntermTyped
 
     TOperator newOp = EOpNull;
 
-    // Certain explicit conversions are allowed conditionally
-    bool arithemeticInt8Enabled = extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types) ||
-                                  extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types_int8);
-#ifdef AMD_EXTENSIONS
-    bool arithemeticInt16Enabled = extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types) ||
-                                   extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types_int16) ||
-                                   extensionRequested(E_GL_AMD_gpu_shader_int16);
-
-    bool arithemeticFloat16Enabled = extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types) ||
-                                     extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types_float16) ||
-                                     extensionRequested(E_GL_AMD_gpu_shader_half_float);
-#else
-    bool arithemeticInt16Enabled = extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types) ||
-                                   extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types_int16);
-
-    bool arithemeticFloat16Enabled = extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types) ||
-                                     extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types_float16);
-#endif
     bool convertToIntTypes = (convertTo == EbtInt8  || convertTo == EbtUint8  ||
                               convertTo == EbtInt16 || convertTo == EbtUint16 ||
                               convertTo == EbtInt   || convertTo == EbtUint   ||
@@ -608,19 +590,19 @@ TIntermTyped* TIntermediate::createConversion(TBasicType convertTo, TIntermTyped
                                   node->getBasicType() == EbtFloat ||
                                   node->getBasicType() == EbtDouble);
 
-    if (! arithemeticInt8Enabled) {
+    if (! getArithemeticInt8Enabled()) {
         if (((convertTo == EbtInt8 || convertTo == EbtUint8) && ! convertFromIntTypes) ||
             ((node->getBasicType() == EbtInt8 || node->getBasicType() == EbtUint8) && ! convertToIntTypes))
             return nullptr;
     }
 
-    if (! arithemeticInt16Enabled) {
+    if (! getArithemeticInt16Enabled()) {
         if (((convertTo == EbtInt16 || convertTo == EbtUint16) && ! convertFromIntTypes) ||
             ((node->getBasicType() == EbtInt16 || node->getBasicType() == EbtUint16) && ! convertToIntTypes))
             return nullptr;
     }
 
-    if (! arithemeticFloat16Enabled) {
+    if (! getArithemeticFloat16Enabled()) {
         if ((convertTo == EbtFloat16 && ! convertFromFloatTypes) ||
             (node->getBasicType() == EbtFloat16 && ! convertToFloatTypes))
             return nullptr;
@@ -841,9 +823,15 @@ TIntermTyped* TIntermediate::createConversion(TBasicType convertTo, TIntermTyped
     newNode = addUnaryNode(newOp, node, node->getLoc(), newType);
 
     if (node->getAsConstantUnion()) {
-        TIntermTyped* folded = node->getAsConstantUnion()->fold(newOp, newType);
-        if (folded)
-            return folded;
+        // 8/16-bit storage extensions don't support 8/16-bit constants, so don't fold conversions
+        // to those types
+        if ((getArithemeticInt8Enabled() || !(convertTo == EbtInt8 || convertTo == EbtUint8)) &&
+            (getArithemeticInt16Enabled() || !(convertTo == EbtInt16 || convertTo == EbtUint16)) &&
+            (getArithemeticFloat16Enabled() || !(convertTo == EbtFloat16))) {
+            TIntermTyped* folded = node->getAsConstantUnion()->fold(newOp, newType);
+            if (folded)
+                return folded;
+        }
     }
 
     // Propagate specialization-constant-ness, if allowed
diff --git a/glslang/MachineIndependent/ParseHelper.cpp b/glslang/MachineIndependent/ParseHelper.cpp
index 57cc5f9..276f4b7 100644
--- a/glslang/MachineIndependent/ParseHelper.cpp
+++ b/glslang/MachineIndependent/ParseHelper.cpp
@@ -6948,6 +6948,16 @@ TIntermTyped* TParseContext::constructBuiltIn(const TType& type, TOperator op, T
     case EOpConstructF16Mat4x4:
     case EOpConstructFloat16:
         basicOp = EOpConstructFloat16;
+        // 8/16-bit storage extensions don't support constructing composites of 8/16-bit types,
+        // so construct a 32-bit type and convert
+        if (!intermediate.getArithemeticFloat16Enabled()) {
+            TType tempType(EbtFloat, EvqTemporary, type.getVectorSize());
+            newNode = node;
+            if (tempType != newNode->getType())
+                newNode = intermediate.setAggregateOperator(newNode, (TOperator)(EOpConstructVec2 + op - EOpConstructF16Vec2), tempType, node->getLoc());
+            newNode = intermediate.addConversion(EbtFloat16, newNode);
+            return newNode;
+        }
         break;
 
     case EOpConstructI8Vec2:
@@ -6955,6 +6965,16 @@ TIntermTyped* TParseContext::constructBuiltIn(const TType& type, TOperator op, T
     case EOpConstructI8Vec4:
     case EOpConstructInt8:
         basicOp = EOpConstructInt8;
+        // 8/16-bit storage extensions don't support constructing composites of 8/16-bit types,
+        // so construct a 32-bit type and convert
+        if (!intermediate.getArithemeticInt8Enabled()) {
+            TType tempType(EbtInt, EvqTemporary, type.getVectorSize());
+            newNode = node;
+            if (tempType != newNode->getType())
+                newNode = intermediate.setAggregateOperator(newNode, (TOperator)(EOpConstructIVec2 + op - EOpConstructI8Vec2), tempType, node->getLoc());
+            newNode = intermediate.addConversion(EbtInt8, newNode);
+            return newNode;
+        }
         break;
 
     case EOpConstructU8Vec2:
@@ -6962,6 +6982,16 @@ TIntermTyped* TParseContext::constructBuiltIn(const TType& type, TOperator op, T
     case EOpConstructU8Vec4:
     case EOpConstructUint8:
         basicOp = EOpConstructUint8;
+        // 8/16-bit storage extensions don't support constructing composites of 8/16-bit types,
+        // so construct a 32-bit type and convert
+        if (!intermediate.getArithemeticInt8Enabled()) {
+            TType tempType(EbtUint, EvqTemporary, type.getVectorSize());
+            newNode = node;
+            if (tempType != newNode->getType())
+                newNode = intermediate.setAggregateOperator(newNode, (TOperator)(EOpConstructUVec2 + op - EOpConstructU8Vec2), tempType, node->getLoc());
+            newNode = intermediate.addConversion(EbtUint8, newNode);
+            return newNode;
+        }
         break;
 
     case EOpConstructI16Vec2:
@@ -6969,6 +6999,16 @@ TIntermTyped* TParseContext::constructBuiltIn(const TType& type, TOperator op, T
     case EOpConstructI16Vec4:
     case EOpConstructInt16:
         basicOp = EOpConstructInt16;
+        // 8/16-bit storage extensions don't support constructing composites of 8/16-bit types,
+        // so construct a 32-bit type and convert
+        if (!intermediate.getArithemeticInt16Enabled()) {
+            TType tempType(EbtInt, EvqTemporary, type.getVectorSize());
+            newNode = node;
+            if (tempType != newNode->getType())
+                newNode = intermediate.setAggregateOperator(newNode, (TOperator)(EOpConstructIVec2 + op - EOpConstructI16Vec2), tempType, node->getLoc());
+            newNode = intermediate.addConversion(EbtInt16, newNode);
+            return newNode;
+        }
         break;
 
     case EOpConstructU16Vec2:
@@ -6976,6 +7016,16 @@ TIntermTyped* TParseContext::constructBuiltIn(const TType& type, TOperator op, T
     case EOpConstructU16Vec4:
     case EOpConstructUint16:
         basicOp = EOpConstructUint16;
+        // 8/16-bit storage extensions don't support constructing composites of 8/16-bit types,
+        // so construct a 32-bit type and convert
+        if (!intermediate.getArithemeticInt16Enabled()) {
+            TType tempType(EbtUint, EvqTemporary, type.getVectorSize());
+            newNode = node;
+            if (tempType != newNode->getType())
+                newNode = intermediate.setAggregateOperator(newNode, (TOperator)(EOpConstructUVec2 + op - EOpConstructU16Vec2), tempType, node->getLoc());
+            newNode = intermediate.addConversion(EbtUint16, newNode);
+            return newNode;
+        }
         break;
 
     case EOpConstructIVec2:
diff --git a/glslang/MachineIndependent/localintermediate.h b/glslang/MachineIndependent/localintermediate.h
index 7b8bf7f..eb0cc7a 100644
--- a/glslang/MachineIndependent/localintermediate.h
+++ b/glslang/MachineIndependent/localintermediate.h
@@ -780,6 +780,27 @@ public:
     const char* const implicitThisName;
     const char* const implicitCounterName;
 
+    // Certain explicit conversions are allowed conditionally
+    bool getArithemeticInt8Enabled() const {
+        return extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types) ||
+               extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types_int8);
+    }
+    bool getArithemeticInt16Enabled() const {
+        return extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types) ||
+#ifdef AMD_EXTENSIONS
+               extensionRequested(E_GL_AMD_gpu_shader_int16) ||
+#endif
+               extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types_int16);
+    }
+
+    bool getArithemeticFloat16Enabled() const {
+        return extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types) ||
+#ifdef AMD_EXTENSIONS
+               extensionRequested(E_GL_AMD_gpu_shader_half_float) ||
+#endif
+               extensionRequested(E_GL_EXT_shader_explicit_arithmetic_types_float16);
+    }
+
 protected:
     TIntermSymbol* addSymbol(int Id, const TString&, const TType&, const TConstUnionArray&, TIntermTyped* subtree, const TSourceLoc&);
     void error(TInfoSink& infoSink, const char*);
-- 
2.7.4