Add-support-for-SPV_NV_compute_shader_derivatives
authorChao Chen <chaoc@nvidia.com>
Wed, 19 Sep 2018 18:40:45 +0000 (11:40 -0700)
committerChao Chen <chaoc@nvidia.com>
Wed, 19 Sep 2018 20:07:42 +0000 (13:07 -0700)
14 files changed:
SPIRV/GLSL.ext.NV.h
SPIRV/GlslangToSpv.cpp
SPIRV/doc.cpp
Test/baseResults/spv.computeShaderDerivatives.comp.out [new file with mode: 0644]
Test/baseResults/spv.computeShaderDerivatives2.comp.out [new file with mode: 0644]
Test/spv.computeShaderDerivatives.comp [new file with mode: 0644]
Test/spv.computeShaderDerivatives2.comp [new file with mode: 0644]
glslang/Include/Types.h
glslang/MachineIndependent/Initialize.cpp
glslang/MachineIndependent/ParseHelper.cpp
glslang/MachineIndependent/Versions.cpp
glslang/MachineIndependent/Versions.h
glslang/MachineIndependent/localintermediate.h
gtests/Spv.FromFile.cpp

index 8b1faff..ddef3dd 100644 (file)
@@ -33,7 +33,7 @@ enum Op;
 enum Capability;
 
 static const int GLSLextNVVersion = 100;
-static const int GLSLextNVRevision = 6;
+static const int GLSLextNVRevision = 7;
 
 //SPV_NV_sample_mask_override_coverage
 const char* const E_SPV_NV_sample_mask_override_coverage = "SPV_NV_sample_mask_override_coverage";
@@ -57,4 +57,7 @@ const char* const E_SPV_NV_shader_subgroup_partitioned = "SPV_NV_shader_subgroup
 //SPV_NV_fragment_shader_barycentric
 const char* const E_SPV_NV_fragment_shader_barycentric = "SPV_NV_fragment_shader_barycentric";
 
+//SPV_NV_compute_shader_derivatives
+const char* const E_SPV_NV_compute_shader_derivatives = "SPV_NV_compute_shader_derivatives";
+
 #endif  // #ifndef GLSLextNV_H
\ No newline at end of file
index c04a6c5..4911920 100755 (executable)
@@ -1300,6 +1300,17 @@ TGlslangToSpvTraverser::TGlslangToSpvTraverser(unsigned int spvVersion, const gl
         builder.addExecutionMode(shaderEntry, spv::ExecutionModeLocalSize, glslangIntermediate->getLocalSize(0),
                                                                            glslangIntermediate->getLocalSize(1),
                                                                            glslangIntermediate->getLocalSize(2));
+#ifdef NV_EXTENSIONS
+        if (glslangIntermediate->getLayoutDerivativeModeNone() == glslang::LayoutDerivativeGroupQuads) {
+            builder.addCapability(spv::CapabilityComputeDerivativeGroupQuadsNV);
+            builder.addExecutionMode(shaderEntry, spv::ExecutionModeDerivativeGroupQuadsNV);
+            builder.addExtension(spv::E_SPV_NV_compute_shader_derivatives);
+        } else if (glslangIntermediate->getLayoutDerivativeModeNone() == glslang::LayoutDerivativeGroupLinear) {
+            builder.addCapability(spv::CapabilityComputeDerivativeGroupLinearNV);
+            builder.addExecutionMode(shaderEntry, spv::ExecutionModeDerivativeGroupLinearNV);
+            builder.addExtension(spv::E_SPV_NV_compute_shader_derivatives);
+        }
+#endif
         break;
 
     default:
@@ -4093,7 +4104,13 @@ spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermO
     if (cracked.lod) {
         params.lod = arguments[2 + extraArgs];
         ++extraArgs;
-    } else if (glslangIntermediate->getStage() != EShLangFragment) {
+    } else if (glslangIntermediate->getStage() != EShLangFragment
+#ifdef NV_EXTENSIONS
+        // NV_compute_shader_derivatives layout qualifiers allow for implicit LODs
+           && !(glslangIntermediate->getStage() == EShLangCompute &&
+                (glslangIntermediate->getLayoutDerivativeModeNone() != glslang::LayoutDerivativeNone))
+#endif
+        ) {
         // we need to invent the default lod for an explicit lod instruction for a non-fragment stage
         noImplicitLod = true;
     }
index cd33103..4aaf790 100644 (file)
@@ -166,6 +166,12 @@ const char* ExecutionModeString(int mode)
     case 32: return "Bad";
 
     case 4446:  return "PostDepthCoverage";
+
+#ifdef NV_EXTENSIONS
+    case ExecutionModeDerivativeGroupQuadsNV:   return "DerivativeGroupQuadsNV";
+    case ExecutionModeDerivativeGroupLinearNV:  return "DerivativeGroupLinearNV";
+#endif
+
     case ExecutionModeCeiling:
     default: return "Bad";
     }
@@ -823,13 +829,15 @@ const char* CapabilityString(int info)
 
     case CapabilitySampleMaskPostDepthCoverage:  return "SampleMaskPostDepthCoverage";
 #ifdef NV_EXTENSIONS
-    case CapabilityGeometryShaderPassthroughNV:  return "GeometryShaderPassthroughNV";
-    case CapabilityShaderViewportIndexLayerNV:   return "ShaderViewportIndexLayerNV";
-    case CapabilityShaderViewportMaskNV:         return "ShaderViewportMaskNV";
-    case CapabilityShaderStereoViewNV:           return "ShaderStereoViewNV";
-    case CapabilityPerViewAttributesNV:          return "PerViewAttributesNV";
-    case CapabilityGroupNonUniformPartitionedNV: return "GroupNonUniformPartitionedNV";
-    case CapabilityFragmentBarycentricNV:        return "FragmentBarycentricNV";
+    case CapabilityGeometryShaderPassthroughNV:     return "GeometryShaderPassthroughNV";
+    case CapabilityShaderViewportIndexLayerNV:      return "ShaderViewportIndexLayerNV";
+    case CapabilityShaderViewportMaskNV:            return "ShaderViewportMaskNV";
+    case CapabilityShaderStereoViewNV:              return "ShaderStereoViewNV";
+    case CapabilityPerViewAttributesNV:             return "PerViewAttributesNV";
+    case CapabilityGroupNonUniformPartitionedNV:    return "GroupNonUniformPartitionedNV";
+    case CapabilityComputeDerivativeGroupQuadsNV:   return "ComputeDerivativeGroupQuadsNV";
+    case CapabilityComputeDerivativeGroupLinearNV:  return "ComputeDerivativeGroupLinearNV";
+    case CapabilityFragmentBarycentricNV:           return "FragmentBarycentricNV";
 #endif
 
     case CapabilityFragmentFullyCoveredEXT: return "FragmentFullyCoveredEXT";
diff --git a/Test/baseResults/spv.computeShaderDerivatives.comp.out b/Test/baseResults/spv.computeShaderDerivatives.comp.out
new file mode 100644 (file)
index 0000000..f05dbc0
--- /dev/null
@@ -0,0 +1,358 @@
+spv.computeShaderDerivatives.comp
+// Module Version 10000
+// Generated by (magic number): 80007
+// Id's are bound by 212
+
+                              Capability Shader
+                              Capability DerivativeControl
+                              Capability ComputeDerivativeGroupQuadsNV
+                              Extension  "SPV_NV_compute_shader_derivatives"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main"
+                              ExecutionMode 4 LocalSize 2 4 1
+                              ExecutionMode 4 DerivativeGroupQuadsNV
+                              Source GLSL 450
+                              SourceExtension  "GL_NV_compute_shader_derivatives"
+                              Name 4  "main"
+                              Name 10  "block"
+                              MemberName 10(block) 0  "fDerivativeX"
+                              MemberName 10(block) 1  "fDerivativeY"
+                              MemberName 10(block) 2  "fDerivativeWidth"
+                              MemberName 10(block) 3  "fCoarseDerivativeX"
+                              MemberName 10(block) 4  "fCoarseDerivativeY"
+                              MemberName 10(block) 5  "fCoarseDerivativeWidth"
+                              MemberName 10(block) 6  "fFineDerivativeX"
+                              MemberName 10(block) 7  "fFineDerivativeY"
+                              MemberName 10(block) 8  "fFineDerivativeWidth"
+                              MemberName 10(block) 9  "fX"
+                              MemberName 10(block) 10  "fY"
+                              MemberName 10(block) 11  "v2DerivativeX"
+                              MemberName 10(block) 12  "v2DerivativeY"
+                              MemberName 10(block) 13  "v2DerivativeWidth"
+                              MemberName 10(block) 14  "v2CoarseDerivativeX"
+                              MemberName 10(block) 15  "v2CoarseDerivativeY"
+                              MemberName 10(block) 16  "v2CoarseDerivativeWidth"
+                              MemberName 10(block) 17  "v2FineDerivativeX"
+                              MemberName 10(block) 18  "v2FineDerivativeY"
+                              MemberName 10(block) 19  "v2FineDerivativeWidth"
+                              MemberName 10(block) 20  "v2X"
+                              MemberName 10(block) 21  "v2Y"
+                              MemberName 10(block) 22  "v3DerivativeX"
+                              MemberName 10(block) 23  "v3DerivativeY"
+                              MemberName 10(block) 24  "v3DerivativeWidth"
+                              MemberName 10(block) 25  "v3CoarseDerivativeX"
+                              MemberName 10(block) 26  "v3CoarseDerivativeY"
+                              MemberName 10(block) 27  "v3CoarseDerivativeWidth"
+                              MemberName 10(block) 28  "v3FineDerivativeX"
+                              MemberName 10(block) 29  "v3FineDerivativeY"
+                              MemberName 10(block) 30  "v3FineDerivativeWidth"
+                              MemberName 10(block) 31  "v3X"
+                              MemberName 10(block) 32  "v3Y"
+                              MemberName 10(block) 33  "v4DerivativeX"
+                              MemberName 10(block) 34  "v4DerivativeY"
+                              MemberName 10(block) 35  "v4DerivativeWidth"
+                              MemberName 10(block) 36  "v4CoarseDerivativeX"
+                              MemberName 10(block) 37  "v4CoarseDerivativeY"
+                              MemberName 10(block) 38  "v4CoarseDerivativeWidth"
+                              MemberName 10(block) 39  "v4FineDerivativeX"
+                              MemberName 10(block) 40  "v4FineDerivativeY"
+                              MemberName 10(block) 41  "v4FineDerivativeWidth"
+                              MemberName 10(block) 42  "v4X"
+                              MemberName 10(block) 43  "v4Y"
+                              Name 12  ""
+                              MemberDecorate 10(block) 0 Offset 0
+                              MemberDecorate 10(block) 1 Offset 4
+                              MemberDecorate 10(block) 2 Offset 8
+                              MemberDecorate 10(block) 3 Offset 12
+                              MemberDecorate 10(block) 4 Offset 16
+                              MemberDecorate 10(block) 5 Offset 20
+                              MemberDecorate 10(block) 6 Offset 24
+                              MemberDecorate 10(block) 7 Offset 28
+                              MemberDecorate 10(block) 8 Offset 32
+                              MemberDecorate 10(block) 9 Offset 36
+                              MemberDecorate 10(block) 10 Offset 40
+                              MemberDecorate 10(block) 11 Offset 48
+                              MemberDecorate 10(block) 12 Offset 56
+                              MemberDecorate 10(block) 13 Offset 64
+                              MemberDecorate 10(block) 14 Offset 72
+                              MemberDecorate 10(block) 15 Offset 80
+                              MemberDecorate 10(block) 16 Offset 88
+                              MemberDecorate 10(block) 17 Offset 96
+                              MemberDecorate 10(block) 18 Offset 104
+                              MemberDecorate 10(block) 19 Offset 112
+                              MemberDecorate 10(block) 20 Offset 120
+                              MemberDecorate 10(block) 21 Offset 128
+                              MemberDecorate 10(block) 22 Offset 144
+                              MemberDecorate 10(block) 23 Offset 160
+                              MemberDecorate 10(block) 24 Offset 176
+                              MemberDecorate 10(block) 25 Offset 192
+                              MemberDecorate 10(block) 26 Offset 208
+                              MemberDecorate 10(block) 27 Offset 224
+                              MemberDecorate 10(block) 28 Offset 240
+                              MemberDecorate 10(block) 29 Offset 256
+                              MemberDecorate 10(block) 30 Offset 272
+                              MemberDecorate 10(block) 31 Offset 288
+                              MemberDecorate 10(block) 32 Offset 304
+                              MemberDecorate 10(block) 33 Offset 320
+                              MemberDecorate 10(block) 34 Offset 336
+                              MemberDecorate 10(block) 35 Offset 352
+                              MemberDecorate 10(block) 36 Offset 368
+                              MemberDecorate 10(block) 37 Offset 384
+                              MemberDecorate 10(block) 38 Offset 400
+                              MemberDecorate 10(block) 39 Offset 416
+                              MemberDecorate 10(block) 40 Offset 432
+                              MemberDecorate 10(block) 41 Offset 448
+                              MemberDecorate 10(block) 42 Offset 464
+                              MemberDecorate 10(block) 43 Offset 480
+                              Decorate 10(block) BufferBlock
+                              Decorate 12 DescriptorSet 0
+                              Decorate 211 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+               7:             TypeVector 6(float) 2
+               8:             TypeVector 6(float) 3
+               9:             TypeVector 6(float) 4
+       10(block):             TypeStruct 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4)
+              11:             TypePointer Uniform 10(block)
+              12:     11(ptr) Variable Uniform
+              13:             TypeInt 32 1
+              14:     13(int) Constant 0
+              15:     13(int) Constant 9
+              16:             TypePointer Uniform 6(float)
+              21:     13(int) Constant 1
+              22:     13(int) Constant 10
+              27:     13(int) Constant 2
+              32:     13(int) Constant 3
+              37:     13(int) Constant 4
+              42:     13(int) Constant 5
+              47:     13(int) Constant 6
+              52:     13(int) Constant 7
+              57:     13(int) Constant 8
+              62:     13(int) Constant 11
+              63:     13(int) Constant 20
+              64:             TypePointer Uniform 7(fvec2)
+              69:     13(int) Constant 12
+              70:     13(int) Constant 21
+              75:     13(int) Constant 13
+              80:     13(int) Constant 14
+              85:     13(int) Constant 15
+              90:     13(int) Constant 16
+              95:     13(int) Constant 17
+             100:     13(int) Constant 18
+             105:     13(int) Constant 19
+             110:     13(int) Constant 22
+             111:     13(int) Constant 31
+             112:             TypePointer Uniform 8(fvec3)
+             117:     13(int) Constant 23
+             118:     13(int) Constant 32
+             123:     13(int) Constant 24
+             128:     13(int) Constant 25
+             133:     13(int) Constant 26
+             138:     13(int) Constant 27
+             143:     13(int) Constant 28
+             148:     13(int) Constant 29
+             153:     13(int) Constant 30
+             158:     13(int) Constant 33
+             159:     13(int) Constant 42
+             160:             TypePointer Uniform 9(fvec4)
+             165:     13(int) Constant 34
+             166:     13(int) Constant 43
+             171:     13(int) Constant 35
+             176:     13(int) Constant 36
+             181:     13(int) Constant 37
+             186:     13(int) Constant 38
+             191:     13(int) Constant 39
+             196:     13(int) Constant 40
+             201:     13(int) Constant 41
+             206:             TypeInt 32 0
+             207:             TypeVector 206(int) 3
+             208:    206(int) Constant 2
+             209:    206(int) Constant 4
+             210:    206(int) Constant 1
+             211:  207(ivec3) ConstantComposite 208 209 210
+         4(main):           2 Function None 3
+               5:             Label
+              17:     16(ptr) AccessChain 12 15
+              18:    6(float) Load 17
+              19:    6(float) DPdx 18
+              20:     16(ptr) AccessChain 12 14
+                              Store 20 19
+              23:     16(ptr) AccessChain 12 22
+              24:    6(float) Load 23
+              25:    6(float) DPdy 24
+              26:     16(ptr) AccessChain 12 21
+                              Store 26 25
+              28:     16(ptr) AccessChain 12 15
+              29:    6(float) Load 28
+              30:    6(float) Fwidth 29
+              31:     16(ptr) AccessChain 12 27
+                              Store 31 30
+              33:     16(ptr) AccessChain 12 15
+              34:    6(float) Load 33
+              35:    6(float) DPdxCoarse 34
+              36:     16(ptr) AccessChain 12 32
+                              Store 36 35
+              38:     16(ptr) AccessChain 12 22
+              39:    6(float) Load 38
+              40:    6(float) DPdyCoarse 39
+              41:     16(ptr) AccessChain 12 37
+                              Store 41 40
+              43:     16(ptr) AccessChain 12 15
+              44:    6(float) Load 43
+              45:    6(float) FwidthCoarse 44
+              46:     16(ptr) AccessChain 12 42
+                              Store 46 45
+              48:     16(ptr) AccessChain 12 15
+              49:    6(float) Load 48
+              50:    6(float) DPdxFine 49
+              51:     16(ptr) AccessChain 12 47
+                              Store 51 50
+              53:     16(ptr) AccessChain 12 22
+              54:    6(float) Load 53
+              55:    6(float) DPdyFine 54
+              56:     16(ptr) AccessChain 12 52
+                              Store 56 55
+              58:     16(ptr) AccessChain 12 15
+              59:    6(float) Load 58
+              60:    6(float) FwidthFine 59
+              61:     16(ptr) AccessChain 12 57
+                              Store 61 60
+              65:     64(ptr) AccessChain 12 63
+              66:    7(fvec2) Load 65
+              67:    7(fvec2) DPdx 66
+              68:     64(ptr) AccessChain 12 62
+                              Store 68 67
+              71:     64(ptr) AccessChain 12 70
+              72:    7(fvec2) Load 71
+              73:    7(fvec2) DPdy 72
+              74:     64(ptr) AccessChain 12 69
+                              Store 74 73
+              76:     64(ptr) AccessChain 12 63
+              77:    7(fvec2) Load 76
+              78:    7(fvec2) Fwidth 77
+              79:     64(ptr) AccessChain 12 75
+                              Store 79 78
+              81:     64(ptr) AccessChain 12 63
+              82:    7(fvec2) Load 81
+              83:    7(fvec2) DPdxCoarse 82
+              84:     64(ptr) AccessChain 12 80
+                              Store 84 83
+              86:     64(ptr) AccessChain 12 70
+              87:    7(fvec2) Load 86
+              88:    7(fvec2) DPdyCoarse 87
+              89:     64(ptr) AccessChain 12 85
+                              Store 89 88
+              91:     64(ptr) AccessChain 12 63
+              92:    7(fvec2) Load 91
+              93:    7(fvec2) FwidthCoarse 92
+              94:     64(ptr) AccessChain 12 90
+                              Store 94 93
+              96:     64(ptr) AccessChain 12 63
+              97:    7(fvec2) Load 96
+              98:    7(fvec2) DPdxFine 97
+              99:     64(ptr) AccessChain 12 95
+                              Store 99 98
+             101:     64(ptr) AccessChain 12 70
+             102:    7(fvec2) Load 101
+             103:    7(fvec2) DPdyFine 102
+             104:     64(ptr) AccessChain 12 100
+                              Store 104 103
+             106:     64(ptr) AccessChain 12 63
+             107:    7(fvec2) Load 106
+             108:    7(fvec2) FwidthFine 107
+             109:     64(ptr) AccessChain 12 105
+                              Store 109 108
+             113:    112(ptr) AccessChain 12 111
+             114:    8(fvec3) Load 113
+             115:    8(fvec3) DPdx 114
+             116:    112(ptr) AccessChain 12 110
+                              Store 116 115
+             119:    112(ptr) AccessChain 12 118
+             120:    8(fvec3) Load 119
+             121:    8(fvec3) DPdy 120
+             122:    112(ptr) AccessChain 12 117
+                              Store 122 121
+             124:    112(ptr) AccessChain 12 111
+             125:    8(fvec3) Load 124
+             126:    8(fvec3) Fwidth 125
+             127:    112(ptr) AccessChain 12 123
+                              Store 127 126
+             129:    112(ptr) AccessChain 12 111
+             130:    8(fvec3) Load 129
+             131:    8(fvec3) DPdxCoarse 130
+             132:    112(ptr) AccessChain 12 128
+                              Store 132 131
+             134:    112(ptr) AccessChain 12 118
+             135:    8(fvec3) Load 134
+             136:    8(fvec3) DPdyCoarse 135
+             137:    112(ptr) AccessChain 12 133
+                              Store 137 136
+             139:    112(ptr) AccessChain 12 111
+             140:    8(fvec3) Load 139
+             141:    8(fvec3) FwidthCoarse 140
+             142:    112(ptr) AccessChain 12 138
+                              Store 142 141
+             144:    112(ptr) AccessChain 12 111
+             145:    8(fvec3) Load 144
+             146:    8(fvec3) DPdxFine 145
+             147:    112(ptr) AccessChain 12 143
+                              Store 147 146
+             149:    112(ptr) AccessChain 12 118
+             150:    8(fvec3) Load 149
+             151:    8(fvec3) DPdyFine 150
+             152:    112(ptr) AccessChain 12 148
+                              Store 152 151
+             154:    112(ptr) AccessChain 12 111
+             155:    8(fvec3) Load 154
+             156:    8(fvec3) FwidthFine 155
+             157:    112(ptr) AccessChain 12 153
+                              Store 157 156
+             161:    160(ptr) AccessChain 12 159
+             162:    9(fvec4) Load 161
+             163:    9(fvec4) DPdx 162
+             164:    160(ptr) AccessChain 12 158
+                              Store 164 163
+             167:    160(ptr) AccessChain 12 166
+             168:    9(fvec4) Load 167
+             169:    9(fvec4) DPdy 168
+             170:    160(ptr) AccessChain 12 165
+                              Store 170 169
+             172:    160(ptr) AccessChain 12 159
+             173:    9(fvec4) Load 172
+             174:    9(fvec4) Fwidth 173
+             175:    160(ptr) AccessChain 12 171
+                              Store 175 174
+             177:    160(ptr) AccessChain 12 159
+             178:    9(fvec4) Load 177
+             179:    9(fvec4) DPdxCoarse 178
+             180:    160(ptr) AccessChain 12 176
+                              Store 180 179
+             182:    160(ptr) AccessChain 12 166
+             183:    9(fvec4) Load 182
+             184:    9(fvec4) DPdyCoarse 183
+             185:    160(ptr) AccessChain 12 181
+                              Store 185 184
+             187:    160(ptr) AccessChain 12 159
+             188:    9(fvec4) Load 187
+             189:    9(fvec4) FwidthCoarse 188
+             190:    160(ptr) AccessChain 12 186
+                              Store 190 189
+             192:    160(ptr) AccessChain 12 159
+             193:    9(fvec4) Load 192
+             194:    9(fvec4) DPdxFine 193
+             195:    160(ptr) AccessChain 12 191
+                              Store 195 194
+             197:    160(ptr) AccessChain 12 166
+             198:    9(fvec4) Load 197
+             199:    9(fvec4) DPdyFine 198
+             200:    160(ptr) AccessChain 12 196
+                              Store 200 199
+             202:    160(ptr) AccessChain 12 159
+             203:    9(fvec4) Load 202
+             204:    9(fvec4) FwidthFine 203
+             205:    160(ptr) AccessChain 12 201
+                              Store 205 204
+                              Return
+                              FunctionEnd
diff --git a/Test/baseResults/spv.computeShaderDerivatives2.comp.out b/Test/baseResults/spv.computeShaderDerivatives2.comp.out
new file mode 100644 (file)
index 0000000..94a3dfc
--- /dev/null
@@ -0,0 +1,358 @@
+spv.computeShaderDerivatives2.comp
+// Module Version 10000
+// Generated by (magic number): 80007
+// Id's are bound by 212
+
+                              Capability Shader
+                              Capability DerivativeControl
+                              Capability ComputeDerivativeGroupLinearNV
+                              Extension  "SPV_NV_compute_shader_derivatives"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main"
+                              ExecutionMode 4 LocalSize 2 4 1
+                              ExecutionMode 4 DerivativeGroupLinearNV
+                              Source ESSL 320
+                              SourceExtension  "GL_NV_compute_shader_derivatives"
+                              Name 4  "main"
+                              Name 10  "block"
+                              MemberName 10(block) 0  "fDerivativeX"
+                              MemberName 10(block) 1  "fDerivativeY"
+                              MemberName 10(block) 2  "fDerivativeWidth"
+                              MemberName 10(block) 3  "fCoarseDerivativeX"
+                              MemberName 10(block) 4  "fCoarseDerivativeY"
+                              MemberName 10(block) 5  "fCoarseDerivativeWidth"
+                              MemberName 10(block) 6  "fFineDerivativeX"
+                              MemberName 10(block) 7  "fFineDerivativeY"
+                              MemberName 10(block) 8  "fFineDerivativeWidth"
+                              MemberName 10(block) 9  "fX"
+                              MemberName 10(block) 10  "fY"
+                              MemberName 10(block) 11  "v2DerivativeX"
+                              MemberName 10(block) 12  "v2DerivativeY"
+                              MemberName 10(block) 13  "v2DerivativeWidth"
+                              MemberName 10(block) 14  "v2CoarseDerivativeX"
+                              MemberName 10(block) 15  "v2CoarseDerivativeY"
+                              MemberName 10(block) 16  "v2CoarseDerivativeWidth"
+                              MemberName 10(block) 17  "v2FineDerivativeX"
+                              MemberName 10(block) 18  "v2FineDerivativeY"
+                              MemberName 10(block) 19  "v2FineDerivativeWidth"
+                              MemberName 10(block) 20  "v2X"
+                              MemberName 10(block) 21  "v2Y"
+                              MemberName 10(block) 22  "v3DerivativeX"
+                              MemberName 10(block) 23  "v3DerivativeY"
+                              MemberName 10(block) 24  "v3DerivativeWidth"
+                              MemberName 10(block) 25  "v3CoarseDerivativeX"
+                              MemberName 10(block) 26  "v3CoarseDerivativeY"
+                              MemberName 10(block) 27  "v3CoarseDerivativeWidth"
+                              MemberName 10(block) 28  "v3FineDerivativeX"
+                              MemberName 10(block) 29  "v3FineDerivativeY"
+                              MemberName 10(block) 30  "v3FineDerivativeWidth"
+                              MemberName 10(block) 31  "v3X"
+                              MemberName 10(block) 32  "v3Y"
+                              MemberName 10(block) 33  "v4DerivativeX"
+                              MemberName 10(block) 34  "v4DerivativeY"
+                              MemberName 10(block) 35  "v4DerivativeWidth"
+                              MemberName 10(block) 36  "v4CoarseDerivativeX"
+                              MemberName 10(block) 37  "v4CoarseDerivativeY"
+                              MemberName 10(block) 38  "v4CoarseDerivativeWidth"
+                              MemberName 10(block) 39  "v4FineDerivativeX"
+                              MemberName 10(block) 40  "v4FineDerivativeY"
+                              MemberName 10(block) 41  "v4FineDerivativeWidth"
+                              MemberName 10(block) 42  "v4X"
+                              MemberName 10(block) 43  "v4Y"
+                              Name 12  ""
+                              MemberDecorate 10(block) 0 Offset 0
+                              MemberDecorate 10(block) 1 Offset 4
+                              MemberDecorate 10(block) 2 Offset 8
+                              MemberDecorate 10(block) 3 Offset 12
+                              MemberDecorate 10(block) 4 Offset 16
+                              MemberDecorate 10(block) 5 Offset 20
+                              MemberDecorate 10(block) 6 Offset 24
+                              MemberDecorate 10(block) 7 Offset 28
+                              MemberDecorate 10(block) 8 Offset 32
+                              MemberDecorate 10(block) 9 Offset 36
+                              MemberDecorate 10(block) 10 Offset 40
+                              MemberDecorate 10(block) 11 Offset 48
+                              MemberDecorate 10(block) 12 Offset 56
+                              MemberDecorate 10(block) 13 Offset 64
+                              MemberDecorate 10(block) 14 Offset 72
+                              MemberDecorate 10(block) 15 Offset 80
+                              MemberDecorate 10(block) 16 Offset 88
+                              MemberDecorate 10(block) 17 Offset 96
+                              MemberDecorate 10(block) 18 Offset 104
+                              MemberDecorate 10(block) 19 Offset 112
+                              MemberDecorate 10(block) 20 Offset 120
+                              MemberDecorate 10(block) 21 Offset 128
+                              MemberDecorate 10(block) 22 Offset 144
+                              MemberDecorate 10(block) 23 Offset 160
+                              MemberDecorate 10(block) 24 Offset 176
+                              MemberDecorate 10(block) 25 Offset 192
+                              MemberDecorate 10(block) 26 Offset 208
+                              MemberDecorate 10(block) 27 Offset 224
+                              MemberDecorate 10(block) 28 Offset 240
+                              MemberDecorate 10(block) 29 Offset 256
+                              MemberDecorate 10(block) 30 Offset 272
+                              MemberDecorate 10(block) 31 Offset 288
+                              MemberDecorate 10(block) 32 Offset 304
+                              MemberDecorate 10(block) 33 Offset 320
+                              MemberDecorate 10(block) 34 Offset 336
+                              MemberDecorate 10(block) 35 Offset 352
+                              MemberDecorate 10(block) 36 Offset 368
+                              MemberDecorate 10(block) 37 Offset 384
+                              MemberDecorate 10(block) 38 Offset 400
+                              MemberDecorate 10(block) 39 Offset 416
+                              MemberDecorate 10(block) 40 Offset 432
+                              MemberDecorate 10(block) 41 Offset 448
+                              MemberDecorate 10(block) 42 Offset 464
+                              MemberDecorate 10(block) 43 Offset 480
+                              Decorate 10(block) BufferBlock
+                              Decorate 12 DescriptorSet 0
+                              Decorate 211 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+               7:             TypeVector 6(float) 2
+               8:             TypeVector 6(float) 3
+               9:             TypeVector 6(float) 4
+       10(block):             TypeStruct 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 6(float) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 7(fvec2) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 8(fvec3) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4) 9(fvec4)
+              11:             TypePointer Uniform 10(block)
+              12:     11(ptr) Variable Uniform
+              13:             TypeInt 32 1
+              14:     13(int) Constant 0
+              15:     13(int) Constant 9
+              16:             TypePointer Uniform 6(float)
+              21:     13(int) Constant 1
+              22:     13(int) Constant 10
+              27:     13(int) Constant 2
+              32:     13(int) Constant 3
+              37:     13(int) Constant 4
+              42:     13(int) Constant 5
+              47:     13(int) Constant 6
+              52:     13(int) Constant 7
+              57:     13(int) Constant 8
+              62:     13(int) Constant 11
+              63:     13(int) Constant 20
+              64:             TypePointer Uniform 7(fvec2)
+              69:     13(int) Constant 12
+              70:     13(int) Constant 21
+              75:     13(int) Constant 13
+              80:     13(int) Constant 14
+              85:     13(int) Constant 15
+              90:     13(int) Constant 16
+              95:     13(int) Constant 17
+             100:     13(int) Constant 18
+             105:     13(int) Constant 19
+             110:     13(int) Constant 22
+             111:     13(int) Constant 31
+             112:             TypePointer Uniform 8(fvec3)
+             117:     13(int) Constant 23
+             118:     13(int) Constant 32
+             123:     13(int) Constant 24
+             128:     13(int) Constant 25
+             133:     13(int) Constant 26
+             138:     13(int) Constant 27
+             143:     13(int) Constant 28
+             148:     13(int) Constant 29
+             153:     13(int) Constant 30
+             158:     13(int) Constant 33
+             159:     13(int) Constant 42
+             160:             TypePointer Uniform 9(fvec4)
+             165:     13(int) Constant 34
+             166:     13(int) Constant 43
+             171:     13(int) Constant 35
+             176:     13(int) Constant 36
+             181:     13(int) Constant 37
+             186:     13(int) Constant 38
+             191:     13(int) Constant 39
+             196:     13(int) Constant 40
+             201:     13(int) Constant 41
+             206:             TypeInt 32 0
+             207:             TypeVector 206(int) 3
+             208:    206(int) Constant 2
+             209:    206(int) Constant 4
+             210:    206(int) Constant 1
+             211:  207(ivec3) ConstantComposite 208 209 210
+         4(main):           2 Function None 3
+               5:             Label
+              17:     16(ptr) AccessChain 12 15
+              18:    6(float) Load 17
+              19:    6(float) DPdx 18
+              20:     16(ptr) AccessChain 12 14
+                              Store 20 19
+              23:     16(ptr) AccessChain 12 22
+              24:    6(float) Load 23
+              25:    6(float) DPdy 24
+              26:     16(ptr) AccessChain 12 21
+                              Store 26 25
+              28:     16(ptr) AccessChain 12 15
+              29:    6(float) Load 28
+              30:    6(float) Fwidth 29
+              31:     16(ptr) AccessChain 12 27
+                              Store 31 30
+              33:     16(ptr) AccessChain 12 15
+              34:    6(float) Load 33
+              35:    6(float) DPdxCoarse 34
+              36:     16(ptr) AccessChain 12 32
+                              Store 36 35
+              38:     16(ptr) AccessChain 12 22
+              39:    6(float) Load 38
+              40:    6(float) DPdyCoarse 39
+              41:     16(ptr) AccessChain 12 37
+                              Store 41 40
+              43:     16(ptr) AccessChain 12 15
+              44:    6(float) Load 43
+              45:    6(float) FwidthCoarse 44
+              46:     16(ptr) AccessChain 12 42
+                              Store 46 45
+              48:     16(ptr) AccessChain 12 15
+              49:    6(float) Load 48
+              50:    6(float) DPdxFine 49
+              51:     16(ptr) AccessChain 12 47
+                              Store 51 50
+              53:     16(ptr) AccessChain 12 22
+              54:    6(float) Load 53
+              55:    6(float) DPdyFine 54
+              56:     16(ptr) AccessChain 12 52
+                              Store 56 55
+              58:     16(ptr) AccessChain 12 15
+              59:    6(float) Load 58
+              60:    6(float) FwidthFine 59
+              61:     16(ptr) AccessChain 12 57
+                              Store 61 60
+              65:     64(ptr) AccessChain 12 63
+              66:    7(fvec2) Load 65
+              67:    7(fvec2) DPdx 66
+              68:     64(ptr) AccessChain 12 62
+                              Store 68 67
+              71:     64(ptr) AccessChain 12 70
+              72:    7(fvec2) Load 71
+              73:    7(fvec2) DPdy 72
+              74:     64(ptr) AccessChain 12 69
+                              Store 74 73
+              76:     64(ptr) AccessChain 12 63
+              77:    7(fvec2) Load 76
+              78:    7(fvec2) Fwidth 77
+              79:     64(ptr) AccessChain 12 75
+                              Store 79 78
+              81:     64(ptr) AccessChain 12 63
+              82:    7(fvec2) Load 81
+              83:    7(fvec2) DPdxCoarse 82
+              84:     64(ptr) AccessChain 12 80
+                              Store 84 83
+              86:     64(ptr) AccessChain 12 70
+              87:    7(fvec2) Load 86
+              88:    7(fvec2) DPdyCoarse 87
+              89:     64(ptr) AccessChain 12 85
+                              Store 89 88
+              91:     64(ptr) AccessChain 12 63
+              92:    7(fvec2) Load 91
+              93:    7(fvec2) FwidthCoarse 92
+              94:     64(ptr) AccessChain 12 90
+                              Store 94 93
+              96:     64(ptr) AccessChain 12 63
+              97:    7(fvec2) Load 96
+              98:    7(fvec2) DPdxFine 97
+              99:     64(ptr) AccessChain 12 95
+                              Store 99 98
+             101:     64(ptr) AccessChain 12 70
+             102:    7(fvec2) Load 101
+             103:    7(fvec2) DPdyFine 102
+             104:     64(ptr) AccessChain 12 100
+                              Store 104 103
+             106:     64(ptr) AccessChain 12 63
+             107:    7(fvec2) Load 106
+             108:    7(fvec2) FwidthFine 107
+             109:     64(ptr) AccessChain 12 105
+                              Store 109 108
+             113:    112(ptr) AccessChain 12 111
+             114:    8(fvec3) Load 113
+             115:    8(fvec3) DPdx 114
+             116:    112(ptr) AccessChain 12 110
+                              Store 116 115
+             119:    112(ptr) AccessChain 12 118
+             120:    8(fvec3) Load 119
+             121:    8(fvec3) DPdy 120
+             122:    112(ptr) AccessChain 12 117
+                              Store 122 121
+             124:    112(ptr) AccessChain 12 111
+             125:    8(fvec3) Load 124
+             126:    8(fvec3) Fwidth 125
+             127:    112(ptr) AccessChain 12 123
+                              Store 127 126
+             129:    112(ptr) AccessChain 12 111
+             130:    8(fvec3) Load 129
+             131:    8(fvec3) DPdxCoarse 130
+             132:    112(ptr) AccessChain 12 128
+                              Store 132 131
+             134:    112(ptr) AccessChain 12 118
+             135:    8(fvec3) Load 134
+             136:    8(fvec3) DPdyCoarse 135
+             137:    112(ptr) AccessChain 12 133
+                              Store 137 136
+             139:    112(ptr) AccessChain 12 111
+             140:    8(fvec3) Load 139
+             141:    8(fvec3) FwidthCoarse 140
+             142:    112(ptr) AccessChain 12 138
+                              Store 142 141
+             144:    112(ptr) AccessChain 12 111
+             145:    8(fvec3) Load 144
+             146:    8(fvec3) DPdxFine 145
+             147:    112(ptr) AccessChain 12 143
+                              Store 147 146
+             149:    112(ptr) AccessChain 12 118
+             150:    8(fvec3) Load 149
+             151:    8(fvec3) DPdyFine 150
+             152:    112(ptr) AccessChain 12 148
+                              Store 152 151
+             154:    112(ptr) AccessChain 12 111
+             155:    8(fvec3) Load 154
+             156:    8(fvec3) FwidthFine 155
+             157:    112(ptr) AccessChain 12 153
+                              Store 157 156
+             161:    160(ptr) AccessChain 12 159
+             162:    9(fvec4) Load 161
+             163:    9(fvec4) DPdx 162
+             164:    160(ptr) AccessChain 12 158
+                              Store 164 163
+             167:    160(ptr) AccessChain 12 166
+             168:    9(fvec4) Load 167
+             169:    9(fvec4) DPdy 168
+             170:    160(ptr) AccessChain 12 165
+                              Store 170 169
+             172:    160(ptr) AccessChain 12 159
+             173:    9(fvec4) Load 172
+             174:    9(fvec4) Fwidth 173
+             175:    160(ptr) AccessChain 12 171
+                              Store 175 174
+             177:    160(ptr) AccessChain 12 159
+             178:    9(fvec4) Load 177
+             179:    9(fvec4) DPdxCoarse 178
+             180:    160(ptr) AccessChain 12 176
+                              Store 180 179
+             182:    160(ptr) AccessChain 12 166
+             183:    9(fvec4) Load 182
+             184:    9(fvec4) DPdyCoarse 183
+             185:    160(ptr) AccessChain 12 181
+                              Store 185 184
+             187:    160(ptr) AccessChain 12 159
+             188:    9(fvec4) Load 187
+             189:    9(fvec4) FwidthCoarse 188
+             190:    160(ptr) AccessChain 12 186
+                              Store 190 189
+             192:    160(ptr) AccessChain 12 159
+             193:    9(fvec4) Load 192
+             194:    9(fvec4) DPdxFine 193
+             195:    160(ptr) AccessChain 12 191
+                              Store 195 194
+             197:    160(ptr) AccessChain 12 166
+             198:    9(fvec4) Load 197
+             199:    9(fvec4) DPdyFine 198
+             200:    160(ptr) AccessChain 12 196
+                              Store 200 199
+             202:    160(ptr) AccessChain 12 159
+             203:    9(fvec4) Load 202
+             204:    9(fvec4) FwidthFine 203
+             205:    160(ptr) AccessChain 12 201
+                              Store 205 204
+                              Return
+                              FunctionEnd
diff --git a/Test/spv.computeShaderDerivatives.comp b/Test/spv.computeShaderDerivatives.comp
new file mode 100644 (file)
index 0000000..884f14d
--- /dev/null
@@ -0,0 +1,106 @@
+#version 450
+#extension GL_NV_compute_shader_derivatives : require
+
+layout (local_size_x = 2, local_size_y = 4) in;
+layout(derivative_group_quadsNV) in;
+
+buffer block {
+  float fDerivativeX;
+  float fDerivativeY;
+  float fDerivativeWidth;
+  float fCoarseDerivativeX;
+  float fCoarseDerivativeY;
+  float fCoarseDerivativeWidth;
+  float fFineDerivativeX;
+  float fFineDerivativeY;
+  float fFineDerivativeWidth;
+  
+  float fX;
+  float fY;
+  
+  
+  vec2 v2DerivativeX;
+  vec2 v2DerivativeY;
+  vec2 v2DerivativeWidth;
+  vec2 v2CoarseDerivativeX;
+  vec2 v2CoarseDerivativeY;
+  vec2 v2CoarseDerivativeWidth;
+  vec2 v2FineDerivativeX;
+  vec2 v2FineDerivativeY;
+  vec2 v2FineDerivativeWidth;
+  
+  vec2 v2X;
+  vec2 v2Y;
+  
+  
+  vec3 v3DerivativeX;
+  vec3 v3DerivativeY;
+  vec3 v3DerivativeWidth;
+  vec3 v3CoarseDerivativeX;
+  vec3 v3CoarseDerivativeY;
+  vec3 v3CoarseDerivativeWidth;
+  vec3 v3FineDerivativeX;
+  vec3 v3FineDerivativeY;
+  vec3 v3FineDerivativeWidth;
+  
+  vec3 v3X;
+  vec3 v3Y;
+  
+  
+  vec4 v4DerivativeX;
+  vec4 v4DerivativeY;
+  vec4 v4DerivativeWidth;
+  vec4 v4CoarseDerivativeX;
+  vec4 v4CoarseDerivativeY;
+  vec4 v4CoarseDerivativeWidth;
+  vec4 v4FineDerivativeX;
+  vec4 v4FineDerivativeY;
+  vec4 v4FineDerivativeWidth;
+  
+  vec4 v4X;
+  vec4 v4Y;
+};
+
+void main(){
+    fDerivativeX = dFdx(fX);
+    fDerivativeY = dFdy(fY);
+    fDerivativeWidth = fwidth(fX);
+    fCoarseDerivativeX = dFdxCoarse(fX);
+    fCoarseDerivativeY = dFdyCoarse(fY);
+    fCoarseDerivativeWidth = fwidthCoarse(fX);
+    fFineDerivativeX = dFdxFine(fX);
+    fFineDerivativeY = dFdyFine(fY);
+    fFineDerivativeWidth = fwidthFine(fX);
+
+    v2DerivativeX = dFdx(v2X);
+    v2DerivativeY = dFdy(v2Y);
+    v2DerivativeWidth = fwidth(v2X);
+    v2CoarseDerivativeX = dFdxCoarse(v2X);
+    v2CoarseDerivativeY = dFdyCoarse(v2Y);
+    v2CoarseDerivativeWidth = fwidthCoarse(v2X);
+    v2FineDerivativeX = dFdxFine(v2X);
+    v2FineDerivativeY = dFdyFine(v2Y);
+    v2FineDerivativeWidth = fwidthFine(v2X);
+
+
+    v3DerivativeX = dFdx(v3X);
+    v3DerivativeY = dFdy(v3Y);
+    v3DerivativeWidth = fwidth(v3X);
+    v3CoarseDerivativeX = dFdxCoarse(v3X);
+    v3CoarseDerivativeY = dFdyCoarse(v3Y);
+    v3CoarseDerivativeWidth = fwidthCoarse(v3X);
+    v3FineDerivativeX = dFdxFine(v3X);
+    v3FineDerivativeY = dFdyFine(v3Y);
+    v3FineDerivativeWidth = fwidthFine(v3X);
+
+
+    v4DerivativeX = dFdx(v4X);
+    v4DerivativeY = dFdy(v4Y);
+    v4DerivativeWidth = fwidth(v4X);
+    v4CoarseDerivativeX = dFdxCoarse(v4X);
+    v4CoarseDerivativeY = dFdyCoarse(v4Y);
+    v4CoarseDerivativeWidth = fwidthCoarse(v4X);
+    v4FineDerivativeX = dFdxFine(v4X);
+    v4FineDerivativeY = dFdyFine(v4Y);
+    v4FineDerivativeWidth = fwidthFine(v4X);
+}
diff --git a/Test/spv.computeShaderDerivatives2.comp b/Test/spv.computeShaderDerivatives2.comp
new file mode 100644 (file)
index 0000000..f964fdd
--- /dev/null
@@ -0,0 +1,106 @@
+#version 320 es
+#extension GL_NV_compute_shader_derivatives : require
+
+layout (local_size_x = 2, local_size_y = 4) in;
+layout(derivative_group_linearNV) in;
+
+buffer block {
+  float fDerivativeX;
+  float fDerivativeY;
+  float fDerivativeWidth;
+  float fCoarseDerivativeX;
+  float fCoarseDerivativeY;
+  float fCoarseDerivativeWidth;
+  float fFineDerivativeX;
+  float fFineDerivativeY;
+  float fFineDerivativeWidth;
+  
+  float fX;
+  float fY;
+  
+  
+  vec2 v2DerivativeX;
+  vec2 v2DerivativeY;
+  vec2 v2DerivativeWidth;
+  vec2 v2CoarseDerivativeX;
+  vec2 v2CoarseDerivativeY;
+  vec2 v2CoarseDerivativeWidth;
+  vec2 v2FineDerivativeX;
+  vec2 v2FineDerivativeY;
+  vec2 v2FineDerivativeWidth;
+  
+  vec2 v2X;
+  vec2 v2Y;
+  
+  
+  vec3 v3DerivativeX;
+  vec3 v3DerivativeY;
+  vec3 v3DerivativeWidth;
+  vec3 v3CoarseDerivativeX;
+  vec3 v3CoarseDerivativeY;
+  vec3 v3CoarseDerivativeWidth;
+  vec3 v3FineDerivativeX;
+  vec3 v3FineDerivativeY;
+  vec3 v3FineDerivativeWidth;
+  
+  vec3 v3X;
+  vec3 v3Y;
+  
+  
+  vec4 v4DerivativeX;
+  vec4 v4DerivativeY;
+  vec4 v4DerivativeWidth;
+  vec4 v4CoarseDerivativeX;
+  vec4 v4CoarseDerivativeY;
+  vec4 v4CoarseDerivativeWidth;
+  vec4 v4FineDerivativeX;
+  vec4 v4FineDerivativeY;
+  vec4 v4FineDerivativeWidth;
+  
+  vec4 v4X;
+  vec4 v4Y;
+};
+
+void main(){
+    fDerivativeX = dFdx(fX);
+    fDerivativeY = dFdy(fY);
+    fDerivativeWidth = fwidth(fX);
+    fCoarseDerivativeX = dFdxCoarse(fX);
+    fCoarseDerivativeY = dFdyCoarse(fY);
+    fCoarseDerivativeWidth = fwidthCoarse(fX);
+    fFineDerivativeX = dFdxFine(fX);
+    fFineDerivativeY = dFdyFine(fY);
+    fFineDerivativeWidth = fwidthFine(fX);
+
+    v2DerivativeX = dFdx(v2X);
+    v2DerivativeY = dFdy(v2Y);
+    v2DerivativeWidth = fwidth(v2X);
+    v2CoarseDerivativeX = dFdxCoarse(v2X);
+    v2CoarseDerivativeY = dFdyCoarse(v2Y);
+    v2CoarseDerivativeWidth = fwidthCoarse(v2X);
+    v2FineDerivativeX = dFdxFine(v2X);
+    v2FineDerivativeY = dFdyFine(v2Y);
+    v2FineDerivativeWidth = fwidthFine(v2X);
+
+
+    v3DerivativeX = dFdx(v3X);
+    v3DerivativeY = dFdy(v3Y);
+    v3DerivativeWidth = fwidth(v3X);
+    v3CoarseDerivativeX = dFdxCoarse(v3X);
+    v3CoarseDerivativeY = dFdyCoarse(v3Y);
+    v3CoarseDerivativeWidth = fwidthCoarse(v3X);
+    v3FineDerivativeX = dFdxFine(v3X);
+    v3FineDerivativeY = dFdyFine(v3Y);
+    v3FineDerivativeWidth = fwidthFine(v3X);
+
+
+    v4DerivativeX = dFdx(v4X);
+    v4DerivativeY = dFdy(v4Y);
+    v4DerivativeWidth = fwidth(v4X);
+    v4CoarseDerivativeX = dFdxCoarse(v4X);
+    v4CoarseDerivativeY = dFdyCoarse(v4Y);
+    v4CoarseDerivativeWidth = fwidthCoarse(v4X);
+    v4FineDerivativeX = dFdxFine(v4X);
+    v4FineDerivativeY = dFdyFine(v4Y);
+    v4FineDerivativeWidth = fwidthFine(v4X);
+}
index 1e2ce65..0e7d354 100644 (file)
@@ -1066,7 +1066,9 @@ struct TShaderQualifiers {
     int numViews;             // multiview extenstions
 
 #ifdef NV_EXTENSIONS
-    bool layoutOverrideCoverage;    // true if layout override_coverage set
+    bool layoutOverrideCoverage;        // true if layout override_coverage set
+    bool layoutDerivativeGroupQuads;    // true if layout derivative_group_quadsNV set
+    bool layoutDerivativeGroupLinear;   // true if layout derivative_group_linearNV set
 #endif
 
     void init()
@@ -1092,6 +1094,9 @@ struct TShaderQualifiers {
         numViews = TQualifier::layoutNotSet;
 #ifdef NV_EXTENSIONS
         layoutOverrideCoverage = false;
+        layoutDerivativeGroupQuads = false;
+        layoutDerivativeGroupLinear = false;
+
 #endif
     }
 
@@ -1136,6 +1141,10 @@ struct TShaderQualifiers {
 #ifdef NV_EXTENSIONS
         if (src.layoutOverrideCoverage)
             layoutOverrideCoverage = src.layoutOverrideCoverage;
+        if (src.layoutDerivativeGroupQuads)
+            layoutDerivativeGroupQuads = src.layoutDerivativeGroupQuads;
+        if (src.layoutDerivativeGroupLinear)
+            layoutDerivativeGroupLinear = src.layoutDerivativeGroupLinear;
 #endif
     }
 };
index 71e82f6..becce84 100644 (file)
@@ -124,6 +124,158 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
 {
     //============================================================================
     //
+    // Prototypes for built-in functions used repeatly by different shaders
+    //
+    //============================================================================
+
+    //
+    // Derivatives Functions.
+    //
+    TString derivatives (
+        "float dFdx(float p);"
+        "vec2  dFdx(vec2  p);"
+        "vec3  dFdx(vec3  p);"
+        "vec4  dFdx(vec4  p);"
+
+        "float dFdy(float p);"
+        "vec2  dFdy(vec2  p);"
+        "vec3  dFdy(vec3  p);"
+        "vec4  dFdy(vec4  p);"
+
+        "float fwidth(float p);"
+        "vec2  fwidth(vec2  p);"
+        "vec3  fwidth(vec3  p);"
+        "vec4  fwidth(vec4  p);"
+    );
+
+    TString derivativeControls (
+        "float dFdxFine(float p);"
+        "vec2  dFdxFine(vec2  p);"
+        "vec3  dFdxFine(vec3  p);"
+        "vec4  dFdxFine(vec4  p);"
+
+        "float dFdyFine(float p);"
+        "vec2  dFdyFine(vec2  p);"
+        "vec3  dFdyFine(vec3  p);"
+        "vec4  dFdyFine(vec4  p);"
+
+        "float fwidthFine(float p);"
+        "vec2  fwidthFine(vec2  p);"
+        "vec3  fwidthFine(vec3  p);"
+        "vec4  fwidthFine(vec4  p);"
+
+        "float dFdxCoarse(float p);"
+        "vec2  dFdxCoarse(vec2  p);"
+        "vec3  dFdxCoarse(vec3  p);"
+        "vec4  dFdxCoarse(vec4  p);"
+
+        "float dFdyCoarse(float p);"
+        "vec2  dFdyCoarse(vec2  p);"
+        "vec3  dFdyCoarse(vec3  p);"
+        "vec4  dFdyCoarse(vec4  p);"
+
+        "float fwidthCoarse(float p);"
+        "vec2  fwidthCoarse(vec2  p);"
+        "vec3  fwidthCoarse(vec3  p);"
+        "vec4  fwidthCoarse(vec4  p);"
+    );
+
+    TString derivativesAndControl16bits (
+        "float16_t dFdx(float16_t);"
+        "f16vec2   dFdx(f16vec2);"
+        "f16vec3   dFdx(f16vec3);"
+        "f16vec4   dFdx(f16vec4);"
+
+        "float16_t dFdy(float16_t);"
+        "f16vec2   dFdy(f16vec2);"
+        "f16vec3   dFdy(f16vec3);"
+        "f16vec4   dFdy(f16vec4);"
+
+        "float16_t dFdxFine(float16_t);"
+        "f16vec2   dFdxFine(f16vec2);"
+        "f16vec3   dFdxFine(f16vec3);"
+        "f16vec4   dFdxFine(f16vec4);"
+
+        "float16_t dFdyFine(float16_t);"
+        "f16vec2   dFdyFine(f16vec2);"
+        "f16vec3   dFdyFine(f16vec3);"
+        "f16vec4   dFdyFine(f16vec4);"
+
+        "float16_t dFdxCoarse(float16_t);"
+        "f16vec2   dFdxCoarse(f16vec2);"
+        "f16vec3   dFdxCoarse(f16vec3);"
+        "f16vec4   dFdxCoarse(f16vec4);"
+
+        "float16_t dFdyCoarse(float16_t);"
+        "f16vec2   dFdyCoarse(f16vec2);"
+        "f16vec3   dFdyCoarse(f16vec3);"
+        "f16vec4   dFdyCoarse(f16vec4);"
+
+        "float16_t fwidth(float16_t);"
+        "f16vec2   fwidth(f16vec2);"
+        "f16vec3   fwidth(f16vec3);"
+        "f16vec4   fwidth(f16vec4);"
+
+        "float16_t fwidthFine(float16_t);"
+        "f16vec2   fwidthFine(f16vec2);"
+        "f16vec3   fwidthFine(f16vec3);"
+        "f16vec4   fwidthFine(f16vec4);"
+
+        "float16_t fwidthCoarse(float16_t);"
+        "f16vec2   fwidthCoarse(f16vec2);"
+        "f16vec3   fwidthCoarse(f16vec3);"
+        "f16vec4   fwidthCoarse(f16vec4);"
+    );
+
+    TString derivativesAndControl64bits (
+        "float64_t dFdx(float64_t);"
+        "f64vec2   dFdx(f64vec2);"
+        "f64vec3   dFdx(f64vec3);"
+        "f64vec4   dFdx(f64vec4);"
+
+        "float64_t dFdy(float64_t);"
+        "f64vec2   dFdy(f64vec2);"
+        "f64vec3   dFdy(f64vec3);"
+        "f64vec4   dFdy(f64vec4);"
+
+        "float64_t dFdxFine(float64_t);"
+        "f64vec2   dFdxFine(f64vec2);"
+        "f64vec3   dFdxFine(f64vec3);"
+        "f64vec4   dFdxFine(f64vec4);"
+
+        "float64_t dFdyFine(float64_t);"
+        "f64vec2   dFdyFine(f64vec2);"
+        "f64vec3   dFdyFine(f64vec3);"
+        "f64vec4   dFdyFine(f64vec4);"
+
+        "float64_t dFdxCoarse(float64_t);"
+        "f64vec2   dFdxCoarse(f64vec2);"
+        "f64vec3   dFdxCoarse(f64vec3);"
+        "f64vec4   dFdxCoarse(f64vec4);"
+
+        "float64_t dFdyCoarse(float64_t);"
+        "f64vec2   dFdyCoarse(f64vec2);"
+        "f64vec3   dFdyCoarse(f64vec3);"
+        "f64vec4   dFdyCoarse(f64vec4);"
+
+        "float64_t fwidth(float64_t);"
+        "f64vec2   fwidth(f64vec2);"
+        "f64vec3   fwidth(f64vec3);"
+        "f64vec4   fwidth(f64vec4);"
+
+        "float64_t fwidthFine(float64_t);"
+        "f64vec2   fwidthFine(f64vec2);"
+        "f64vec3   fwidthFine(f64vec3);"
+        "f64vec4   fwidthFine(f64vec4);"
+
+        "float64_t fwidthCoarse(float64_t);"
+        "f64vec2   fwidthCoarse(f64vec2);"
+        "f64vec3   fwidthCoarse(f64vec3);"
+        "f64vec4   fwidthCoarse(f64vec4);"
+    );
+
+    //============================================================================
+    //
     // Prototypes for built-in functions seen by both vertex and fragment shaders.
     //
     //============================================================================
@@ -4550,52 +4702,8 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
             "\n");
         }
         if (profile != EEsProfile && version >= 450) {
+            stageBuiltins[EShLangFragment].append(derivativesAndControl64bits);
             stageBuiltins[EShLangFragment].append(
-                "float64_t dFdx(float64_t);"
-                "f64vec2   dFdx(f64vec2);"
-                "f64vec3   dFdx(f64vec3);"
-                "f64vec4   dFdx(f64vec4);"
-
-                "float64_t dFdy(float64_t);"
-                "f64vec2   dFdy(f64vec2);"
-                "f64vec3   dFdy(f64vec3);"
-                "f64vec4   dFdy(f64vec4);"
-
-                "float64_t dFdxFine(float64_t);"
-                "f64vec2   dFdxFine(f64vec2);"
-                "f64vec3   dFdxFine(f64vec3);"
-                "f64vec4   dFdxFine(f64vec4);"
-
-                "float64_t dFdyFine(float64_t);"
-                "f64vec2   dFdyFine(f64vec2);"
-                "f64vec3   dFdyFine(f64vec3);"
-                "f64vec4   dFdyFine(f64vec4);"
-
-                "float64_t dFdxCoarse(float64_t);"
-                "f64vec2   dFdxCoarse(f64vec2);"
-                "f64vec3   dFdxCoarse(f64vec3);"
-                "f64vec4   dFdxCoarse(f64vec4);"
-
-                "float64_t dFdyCoarse(float64_t);"
-                "f64vec2   dFdyCoarse(f64vec2);"
-                "f64vec3   dFdyCoarse(f64vec3);"
-                "f64vec4   dFdyCoarse(f64vec4);"
-
-                "float64_t fwidth(float64_t);"
-                "f64vec2   fwidth(f64vec2);"
-                "f64vec3   fwidth(f64vec3);"
-                "f64vec4   fwidth(f64vec4);"
-
-                "float64_t fwidthFine(float64_t);"
-                "f64vec2   fwidthFine(f64vec2);"
-                "f64vec3   fwidthFine(f64vec3);"
-                "f64vec4   fwidthFine(f64vec4);"
-
-                "float64_t fwidthCoarse(float64_t);"
-                "f64vec2   fwidthCoarse(f64vec2);"
-                "f64vec3   fwidthCoarse(f64vec3);"
-                "f64vec4   fwidthCoarse(f64vec4);"
-
                 "float64_t interpolateAtCentroid(float64_t);"
                 "f64vec2   interpolateAtCentroid(f64vec2);"
                 "f64vec3   interpolateAtCentroid(f64vec3);"
@@ -4784,61 +4892,13 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
             "\n");
     }
 
-    stageBuiltins[EShLangFragment].append(
-        "float dFdx(float p);"
-        "vec2  dFdx(vec2  p);"
-        "vec3  dFdx(vec3  p);"
-        "vec4  dFdx(vec4  p);"
-
-        "float dFdy(float p);"
-        "vec2  dFdy(vec2  p);"
-        "vec3  dFdy(vec3  p);"
-        "vec4  dFdy(vec4  p);"
-
-        "float fwidth(float p);"
-        "vec2  fwidth(vec2  p);"
-        "vec3  fwidth(vec3  p);"
-        "vec4  fwidth(vec4  p);"
-
-        "\n");
+    stageBuiltins[EShLangFragment].append(derivatives);
+    stageBuiltins[EShLangFragment].append("\n");
 
     // GL_ARB_derivative_control
     if (profile != EEsProfile && version >= 400) {
-        stageBuiltins[EShLangFragment].append(
-            "float dFdxFine(float p);"
-            "vec2  dFdxFine(vec2  p);"
-            "vec3  dFdxFine(vec3  p);"
-            "vec4  dFdxFine(vec4  p);"
-
-            "float dFdyFine(float p);"
-            "vec2  dFdyFine(vec2  p);"
-            "vec3  dFdyFine(vec3  p);"
-            "vec4  dFdyFine(vec4  p);"
-
-            "float fwidthFine(float p);"
-            "vec2  fwidthFine(vec2  p);"
-            "vec3  fwidthFine(vec3  p);"
-            "vec4  fwidthFine(vec4  p);"
-
-            "\n");
-
-        stageBuiltins[EShLangFragment].append(
-            "float dFdxCoarse(float p);"
-            "vec2  dFdxCoarse(vec2  p);"
-            "vec3  dFdxCoarse(vec3  p);"
-            "vec4  dFdxCoarse(vec4  p);"
-
-            "float dFdyCoarse(float p);"
-            "vec2  dFdyCoarse(vec2  p);"
-            "vec3  dFdyCoarse(vec3  p);"
-            "vec4  dFdyCoarse(vec4  p);"
-
-            "float fwidthCoarse(float p);"
-            "vec2  fwidthCoarse(vec2  p);"
-            "vec3  fwidthCoarse(vec3  p);"
-            "vec4  fwidthCoarse(vec4  p);"
-
-            "\n");
+        stageBuiltins[EShLangFragment].append(derivativeControls);
+        stageBuiltins[EShLangFragment].append("\n");
     }
 
     // GL_OES_shader_multisample_interpolation
@@ -4892,52 +4952,10 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
 
     // GL_AMD_gpu_shader_half_float
     if (profile != EEsProfile && version >= 450) {
-        stageBuiltins[EShLangFragment].append(
-            "float16_t dFdx(float16_t);"
-            "f16vec2   dFdx(f16vec2);"
-            "f16vec3   dFdx(f16vec3);"
-            "f16vec4   dFdx(f16vec4);"
-
-            "float16_t dFdy(float16_t);"
-            "f16vec2   dFdy(f16vec2);"
-            "f16vec3   dFdy(f16vec3);"
-            "f16vec4   dFdy(f16vec4);"
-
-            "float16_t dFdxFine(float16_t);"
-            "f16vec2   dFdxFine(f16vec2);"
-            "f16vec3   dFdxFine(f16vec3);"
-            "f16vec4   dFdxFine(f16vec4);"
-
-            "float16_t dFdyFine(float16_t);"
-            "f16vec2   dFdyFine(f16vec2);"
-            "f16vec3   dFdyFine(f16vec3);"
-            "f16vec4   dFdyFine(f16vec4);"
-
-            "float16_t dFdxCoarse(float16_t);"
-            "f16vec2   dFdxCoarse(f16vec2);"
-            "f16vec3   dFdxCoarse(f16vec3);"
-            "f16vec4   dFdxCoarse(f16vec4);"
-
-            "float16_t dFdyCoarse(float16_t);"
-            "f16vec2   dFdyCoarse(f16vec2);"
-            "f16vec3   dFdyCoarse(f16vec3);"
-            "f16vec4   dFdyCoarse(f16vec4);"
-
-            "float16_t fwidth(float16_t);"
-            "f16vec2   fwidth(f16vec2);"
-            "f16vec3   fwidth(f16vec3);"
-            "f16vec4   fwidth(f16vec4);"
-
-            "float16_t fwidthFine(float16_t);"
-            "f16vec2   fwidthFine(f16vec2);"
-            "f16vec3   fwidthFine(f16vec3);"
-            "f16vec4   fwidthFine(f16vec4);"
-
-            "float16_t fwidthCoarse(float16_t);"
-            "f16vec2   fwidthCoarse(f16vec2);"
-            "f16vec3   fwidthCoarse(f16vec3);"
-            "f16vec4   fwidthCoarse(f16vec4);"
+        stageBuiltins[EShLangFragment].append(derivativesAndControl16bits);
+        stageBuiltins[EShLangFragment].append("\n");
 
+        stageBuiltins[EShLangFragment].append(
             "float16_t interpolateAtCentroid(float16_t);"
             "f16vec2   interpolateAtCentroid(f16vec2);"
             "f16vec3   interpolateAtCentroid(f16vec3);"
@@ -4971,6 +4989,22 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
         }
 #endif
 
+#ifdef NV_EXTENSIONS
+    //E_SPV_NV_compute_shader_derivatives
+    
+    stageBuiltins[EShLangCompute].append(derivatives);
+    stageBuiltins[EShLangCompute].append(derivativeControls);
+    stageBuiltins[EShLangCompute].append("\n");
+    
+
+    if (profile != EEsProfile && version >= 450) {
+
+        stageBuiltins[EShLangCompute].append(derivativesAndControl16bits);
+        stageBuiltins[EShLangCompute].append(derivativesAndControl64bits);
+        stageBuiltins[EShLangCompute].append("\n");
+    }
+#endif
+
     //============================================================================
     //
     // Standard Uniforms
@@ -6114,6 +6148,18 @@ void TBuiltIns::addQueryFunctions(TSampler sampler, const TString& typeName, int
 #ifdef AMD_EXTENSIONS
         }
 #endif
+
+#ifdef NV_EXTENSIONS
+        stageBuiltins[EShLangCompute].append("vec2 textureQueryLod(");
+        stageBuiltins[EShLangCompute].append(typeName);
+        if (dimMap[sampler.dim] == 1)
+            stageBuiltins[EShLangCompute].append(", float");
+        else {
+            stageBuiltins[EShLangCompute].append(", vec");
+            stageBuiltins[EShLangCompute].append(postfixes[dimMap[sampler.dim]]);
+        }
+        stageBuiltins[EShLangCompute].append(");\n");
+#endif
     }
 
     //
@@ -6594,9 +6640,12 @@ void TBuiltIns::addSamplingFunctions(TSampler sampler, const TString& typeName,
                                             s.append(");\n");
 
                                             // Add to the per-language set of built-ins
-                                            if (bias || lodClamp)
+                                            if (bias || lodClamp) {
                                                 stageBuiltins[EShLangFragment].append(s);
-                                            else
+#ifdef NV_EXTENSIONS
+                                                stageBuiltins[EShLangCompute].append(s);
+#endif
+                                            } else
                                                 commonBuiltins.append(s);
 
                                         }
@@ -7923,6 +7972,19 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
             BuiltInVariable("gl_BaryCoordNV",        EbvBaryCoordNV,        symbolTable);
             BuiltInVariable("gl_BaryCoordNoPerspNV", EbvBaryCoordNoPerspNV, symbolTable);
         }
+        if (((profile != EEsProfile && version >= 450) ||
+            (profile == EEsProfile && version >= 320)) &&
+            language == EShLangCompute) {
+            symbolTable.setFunctionExtensions("dFdx",                   1, &E_GL_NV_compute_shader_derivatives);
+            symbolTable.setFunctionExtensions("dFdy",                   1, &E_GL_NV_compute_shader_derivatives);
+            symbolTable.setFunctionExtensions("fwidth",                 1, &E_GL_NV_compute_shader_derivatives);
+            symbolTable.setFunctionExtensions("dFdxFine",               1, &E_GL_NV_compute_shader_derivatives);
+            symbolTable.setFunctionExtensions("dFdyFine",               1, &E_GL_NV_compute_shader_derivatives);
+            symbolTable.setFunctionExtensions("fwidthFine",             1, &E_GL_NV_compute_shader_derivatives);
+            symbolTable.setFunctionExtensions("dFdxCoarse",             1, &E_GL_NV_compute_shader_derivatives);
+            symbolTable.setFunctionExtensions("dFdyCoarse",             1, &E_GL_NV_compute_shader_derivatives);
+            symbolTable.setFunctionExtensions("fwidthCoarse",           1, &E_GL_NV_compute_shader_derivatives);
+        }
 #endif
 
         symbolTable.setVariableExtensions("gl_FragDepthEXT", 1, &E_GL_EXT_frag_depth);
@@ -8711,6 +8773,20 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
         symbolTable.relateToOperator("memoryBarrierShared",         EOpMemoryBarrierShared);
         symbolTable.relateToOperator("groupMemoryBarrier",          EOpGroupMemoryBarrier);
         symbolTable.relateToOperator("subgroupMemoryBarrierShared", EOpSubgroupMemoryBarrierShared);
+#ifdef NV_EXTENSIONS
+        if ((profile != EEsProfile && version >= 450) ||
+            (profile == EEsProfile && version >= 320)) {
+            symbolTable.relateToOperator("dFdx",        EOpDPdx);
+            symbolTable.relateToOperator("dFdy",        EOpDPdy);
+            symbolTable.relateToOperator("fwidth",      EOpFwidth);
+            symbolTable.relateToOperator("dFdxFine",    EOpDPdxFine);
+            symbolTable.relateToOperator("dFdyFine",    EOpDPdyFine);
+            symbolTable.relateToOperator("fwidthFine",  EOpFwidthFine);
+            symbolTable.relateToOperator("dFdxCoarse",  EOpDPdxCoarse);
+            symbolTable.relateToOperator("dFdyCoarse",  EOpDPdyCoarse);
+            symbolTable.relateToOperator("fwidthCoarse",EOpFwidthCoarse);
+        }
+#endif
         break;
 
     default:
index 8ab327d..3bcf537 100644 (file)
@@ -4623,6 +4623,18 @@ void TParseContext::setLayoutQualifier(const TSourceLoc& loc, TPublicType& publi
             return;
         }
     }
+    if (language == EShLangCompute) {
+        if (id.compare(0, 17, "derivative_group_") == 0) {
+            requireExtensions(loc, 1, &E_GL_NV_compute_shader_derivatives, "compute shader derivatives");
+            if (id == "derivative_group_quadsnv") {
+                publicType.shaderQualifiers.layoutDerivativeGroupQuads = true;
+                return;
+            } else if (id == "derivative_group_linearnv") {
+                publicType.shaderQualifiers.layoutDerivativeGroupLinear = true;
+                return;
+            }
+        }
+    }
 #else
     }
 #endif
@@ -7027,6 +7039,36 @@ void TParseContext::updateStandaloneQualifierDefaults(const TSourceLoc& loc, con
             error(loc, "can only apply to 'out'", "blend equation", "");
     }
 
+#ifdef NV_EXTENSIONS
+    if (publicType.shaderQualifiers.layoutDerivativeGroupQuads &&
+        publicType.shaderQualifiers.layoutDerivativeGroupLinear) {
+        error(loc, "cannot be both specified", "derivative_group_quadsNV and derivative_group_linearNV", "");
+    }
+
+    if (publicType.shaderQualifiers.layoutDerivativeGroupQuads) {
+        if (publicType.qualifier.storage == EvqVaryingIn) {
+            if ((intermediate.getLocalSize(0) & 1) ||
+                (intermediate.getLocalSize(1) & 1))
+                error(loc, "requires local_size_x and local_size_y to be multiple of two", "derivative_group_quadsNV", "");
+            else
+                intermediate.setLayoutDerivativeMode(LayoutDerivativeGroupQuads);
+        }
+        else
+            error(loc, "can only apply to 'in'", "derivative_group_quadsNV", "");
+    }
+    if (publicType.shaderQualifiers.layoutDerivativeGroupLinear) {
+        if (publicType.qualifier.storage == EvqVaryingIn) {
+            if((intermediate.getLocalSize(0) *
+                intermediate.getLocalSize(1) *
+                intermediate.getLocalSize(2)) % 4 != 0)
+                error(loc, "requires total group size to be multiple of four", "derivative_group_linearNV", "");
+            else
+                intermediate.setLayoutDerivativeMode(LayoutDerivativeGroupLinear);
+        }
+        else
+            error(loc, "can only apply to 'in'", "derivative_group_linearNV", "");
+    }
+#endif 
     const TQualifier& qualifier = publicType.qualifier;
 
     if (qualifier.isAuxiliary() ||
index ee74aec..0221288 100644 (file)
@@ -236,6 +236,7 @@ void TParseVersions::initializeExtensionBehavior()
     extensionBehavior[E_GL_NV_shader_noperspective_interpolation]    = EBhDisable;
     extensionBehavior[E_GL_NV_shader_subgroup_partitioned]           = EBhDisable;
     extensionBehavior[E_GL_NV_fragment_shader_barycentric]           = EBhDisable;
+    extensionBehavior[E_GL_NV_compute_shader_derivatives]            = EBhDisable;
 #endif
 
     // AEP
@@ -407,6 +408,7 @@ void TParseVersions::getPreamble(std::string& preamble)
             "#define GL_NV_conservative_raster_underestimation 1\n"
             "#define GL_NV_shader_subgroup_partitioned 1\n"
             "#define GL_NV_fragment_shader_barycentric 1\n"
+            "#define GL_NV_compute_shader_derivatives 1\n"
 #endif
             "#define GL_KHX_shader_explicit_arithmetic_types 1\n"
             "#define GL_KHX_shader_explicit_arithmetic_types_int8 1\n"
index 9efb242..2102c25 100644 (file)
@@ -208,6 +208,7 @@ const char* const E_GL_NV_conservative_raster_underestimation   = "GL_NV_conserv
 const char* const E_GL_NV_shader_noperspective_interpolation    = "GL_NV_shader_noperspective_interpolation";
 const char* const E_GL_NV_shader_subgroup_partitioned           = "GL_NV_shader_subgroup_partitioned";
 const char* const E_GL_NV_fragment_shader_barycentric           = "GL_NV_fragment_shader_barycentric";
+const char* const E_GL_NV_compute_shader_derivatives            = "GL_NV_compute_shader_derivatives";
 // Arrays of extensions for the above viewportEXTs duplications
 
 const char* const viewportEXTs[] = { E_GL_ARB_shader_viewport_layer_array, E_GL_NV_viewport_array2 };
index e9fe0e4..d5ee86e 100644 (file)
@@ -206,6 +206,17 @@ class TSymbolTable;
 class TSymbol;
 class TVariable;
 
+#ifdef NV_EXTENSIONS
+//
+// Texture and Sampler transformation mode.
+//
+enum ComputeDerivativeMode {
+    LayoutDerivativeNone,         // default layout as SPV_NV_compute_shader_derivatives not enabled
+    LayoutDerivativeGroupQuads,   // derivative_group_quadsNV
+    LayoutDerivativeGroupLinear,  // derivative_group_linearNV
+};
+#endif
+
 //
 // Set of helper functions to help parse and build the tree.
 //
@@ -225,6 +236,7 @@ public:
 #ifdef NV_EXTENSIONS
         layoutOverrideCoverage(false),
         geoPassthroughEXT(false),
+        computeDerivativeMode(LayoutDerivativeNone),
 #endif
         autoMapBindings(false),
         autoMapLocations(false),
@@ -622,6 +634,8 @@ public:
     bool getLayoutOverrideCoverage() const { return layoutOverrideCoverage; }
     void setGeoPassthroughEXT() { geoPassthroughEXT = true; }
     bool getGeoPassthroughEXT() const { return geoPassthroughEXT; }
+    void setLayoutDerivativeMode(ComputeDerivativeMode mode) { computeDerivativeMode = mode; }
+    ComputeDerivativeMode getLayoutDerivativeModeNone() const { return computeDerivativeMode; }
 #endif
 
     const char* addSemanticName(const TString& name)
@@ -725,6 +739,7 @@ protected:
 #ifdef NV_EXTENSIONS
     bool layoutOverrideCoverage;
     bool geoPassthroughEXT;
+    ComputeDerivativeMode computeDerivativeMode;
 #endif
 
     // Base shift values
index 69099dc..f860041 100644 (file)
@@ -503,6 +503,8 @@ INSTANTIATE_TEST_CASE_P(
     "spv.atomicInt64.comp",
     "spv.fragmentShaderBarycentric.frag",
     "spv.fragmentShaderBarycentric2.frag",
+    "spv.computeShaderDerivatives.comp",
+    "spv.computeShaderDerivatives2.comp",
 })),
 FileNameAsCustomTestSuffix
 );