MSL: Emit multiple threadgroup slices for multi-patch.
authorHans-Kristian Arntzen <post@arntzen-software.no>
Wed, 14 Apr 2021 11:13:13 +0000 (13:13 +0200)
committerHans-Kristian Arntzen <post@arntzen-software.no>
Mon, 19 Apr 2021 10:10:49 +0000 (12:10 +0200)
Multiple patches can run in the same workgroup when using multi-patch
mode, so we need to allocate enough storage to avoid false sharing.

36 files changed:
reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc [new file with mode: 0644]
reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc [new file with mode: 0644]
reference/opt/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc [new file with mode: 0644]
reference/opt/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc [new file with mode: 0644]
reference/opt/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc [new file with mode: 0644]
reference/opt/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc [new file with mode: 0644]
reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc [new file with mode: 0644]
reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc [new file with mode: 0644]
reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc [new file with mode: 0644]
reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc [new file with mode: 0644]
reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc [new file with mode: 0644]
reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc [new file with mode: 0644]
reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc [new file with mode: 0644]
reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc [new file with mode: 0644]
reference/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc [new file with mode: 0644]
reference/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc [new file with mode: 0644]
reference/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc [new file with mode: 0644]
reference/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc [new file with mode: 0644]
reference/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc [new file with mode: 0644]
reference/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc [new file with mode: 0644]
shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc [new file with mode: 0644]
shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc [new file with mode: 0644]
shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc [new file with mode: 0644]
shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc [new file with mode: 0644]
shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc [new file with mode: 0644]
shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc [new file with mode: 0644]
shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc [new file with mode: 0644]
shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc [new file with mode: 0644]
shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc [new file with mode: 0644]
shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc [new file with mode: 0644]
shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc [new file with mode: 0644]
shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc [new file with mode: 0644]
shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc [new file with mode: 0644]
shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc [new file with mode: 0644]
spirv_msl.cpp
spirv_msl.hpp

diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc
new file mode 100644 (file)
index 0000000..ca025cd
--- /dev/null
@@ -0,0 +1,44 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float C_a;
+    float C_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float P_b;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup P spvStorage_11[8];
+    threadgroup P (&_11) = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    _11.a = 1.0;
+    patchOut.P_b = 2.0;
+    gl_out[gl_InvocationID].C_a = 3.0;
+    gl_out[gl_InvocationID].C_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc
new file mode 100644 (file)
index 0000000..700e3fc
--- /dev/null
@@ -0,0 +1,44 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float C_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float P_a;
+    float P_b;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup C spvStoragec[8][4];
+    threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    patchOut.P_a = 1.0;
+    patchOut.P_b = 2.0;
+    c[gl_InvocationID].a = 3.0;
+    gl_out[gl_InvocationID].C_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc
new file mode 100644 (file)
index 0000000..d20b7d7
--- /dev/null
@@ -0,0 +1,81 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup float4 spvStoragev0[8][4];
+    threadgroup float4 (&v0)[4] = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    v0[gl_InvocationID] = float4(1.0);
+    v0[gl_InvocationID].z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        patchOut.v1[0] = float4(2.0);
+        ((device float*)&patchOut.v1[0])[0u] = 3.0;
+        patchOut.v1[1] = float4(2.0);
+        ((device float*)&patchOut.v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc
new file mode 100644 (file)
index 0000000..2831008
--- /dev/null
@@ -0,0 +1,40 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    float4 v3;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup float4 spvStoragev1[8][2];
+    threadgroup float4 (&v1)[2] = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((threadgroup float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((threadgroup float*)&v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc
new file mode 100644 (file)
index 0000000..2136034
--- /dev/null
@@ -0,0 +1,89 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        patchOut.v1[0] = float4(2.0);
+        ((device float*)&patchOut.v1[0])[0u] = 3.0;
+        patchOut.v1[1] = float4(2.0);
+        ((device float*)&patchOut.v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out_masked[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc
new file mode 100644 (file)
index 0000000..3aea579
--- /dev/null
@@ -0,0 +1,89 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct main0_out
+{
+    float4 v0;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        patchOut.v1[0] = float4(2.0);
+        ((device float*)&patchOut.v1[0])[0u] = 3.0;
+        patchOut.v1[1] = float4(2.0);
+        ((device float*)&patchOut.v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out_masked[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out_masked[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc
new file mode 100644 (file)
index 0000000..aba207f
--- /dev/null
@@ -0,0 +1,85 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct C
+{
+    float4 v;
+};
+
+struct P
+{
+    float4 v;
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    float4 P_v;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<C, 4> _18 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup C spvStoragec[8][4];
+    threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
+    c[gl_InvocationID] = _18[gl_InvocationID];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    patchOut.P_v = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    c[gl_InvocationID].v = float4(1.0);
+    patchOut.P_v = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc
new file mode 100644 (file)
index 0000000..919cd25
--- /dev/null
@@ -0,0 +1,84 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct C
+{
+    float4 v;
+};
+
+struct P
+{
+    float4 v;
+};
+
+struct main0_out
+{
+    float4 C_v;
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+};
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<C, 4> _18 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    gl_out[gl_InvocationID].C_v = _18[gl_GlobalInvocationID].v;
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup P spvStoragep[8];
+    threadgroup P (&p) = spvStoragep[(gl_GlobalInvocationID.x / 4) % 8];
+    p = P{ float4(0.0) };
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].C_v = float4(1.0);
+    p.v = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc
new file mode 100644 (file)
index 0000000..ce0644e
--- /dev/null
@@ -0,0 +1,103 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct C
+{
+    float4 v;
+};
+
+struct P
+{
+    float4 v;
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+constant spvUnsafeArray<float, 1> _51 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _52 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 C_v;
+    float4 gl_Position;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct main0_patchOut
+{
+    float4 P_v;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<C, 4> _18 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
+    spvUnsafeArray<gl_PerVertex, 4> _33 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    gl_out[gl_InvocationID].C_v = _18[gl_GlobalInvocationID].v;
+    gl_out[gl_InvocationID].gl_Position = _33[gl_GlobalInvocationID].gl_Position;
+    gl_out[gl_InvocationID].gl_ClipDistance[0] = _33[gl_GlobalInvocationID].gl_ClipDistance[0];
+    gl_out[gl_InvocationID].gl_CullDistance[0] = _33[gl_GlobalInvocationID].gl_CullDistance[0];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    gl_out_masked[gl_InvocationID] = _33[gl_InvocationID];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    patchOut.P_v = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].C_v = float4(1.0);
+    patchOut.P_v = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out_masked[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc
new file mode 100644 (file)
index 0000000..ecb4234
--- /dev/null
@@ -0,0 +1,103 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct C
+{
+    float4 v;
+};
+
+struct P
+{
+    float4 v;
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+constant spvUnsafeArray<float, 1> _51 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _52 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 C_v;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct main0_patchOut
+{
+    float4 P_v;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<C, 4> _18 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
+    spvUnsafeArray<gl_PerVertex, 4> _33 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    gl_out[gl_InvocationID].C_v = _18[gl_GlobalInvocationID].v;
+    gl_out[gl_InvocationID].gl_PointSize = _33[gl_GlobalInvocationID].gl_PointSize;
+    gl_out[gl_InvocationID].gl_ClipDistance[0] = _33[gl_GlobalInvocationID].gl_ClipDistance[0];
+    gl_out[gl_InvocationID].gl_CullDistance[0] = _33[gl_GlobalInvocationID].gl_CullDistance[0];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    gl_out_masked[gl_InvocationID] = _33[gl_InvocationID];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    patchOut.P_v = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].C_v = float4(1.0);
+    patchOut.P_v = float4(2.0);
+    gl_out_masked[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc
new file mode 100644 (file)
index 0000000..fec7e27
--- /dev/null
@@ -0,0 +1,93 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex
+{
+    float4 _RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    float _RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+    float _RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance[1];
+    float _RESERVED_IDENTIFIER_FIXUP_gl_CullDistance[1];
+};
+
+constant spvUnsafeArray<float4, 4> _15 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
+constant spvUnsafeArray<float, 1> _45 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _46 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct main0_patchOut
+{
+    float4 foo_patch;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _29 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup float4 spvStoragefoo[8][4];
+    threadgroup float4 (&foo)[4] = spvStoragefoo[(gl_GlobalInvocationID.x / 4) % 8];
+    foo[gl_InvocationID] = _15[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_Position = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    gl_out[gl_InvocationID].gl_PointSize = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+    gl_out[gl_InvocationID].gl_ClipDistance[0] = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance[0];
+    gl_out[gl_InvocationID].gl_CullDistance[0] = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance[0];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    patchOut.foo_patch = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    foo[gl_InvocationID] = float4(1.0);
+    patchOut.foo_patch = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc
new file mode 100644 (file)
index 0000000..37f4e2b
--- /dev/null
@@ -0,0 +1,92 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex
+{
+    float4 _RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    float _RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+    float _RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance[1];
+    float _RESERVED_IDENTIFIER_FIXUP_gl_CullDistance[1];
+};
+
+constant spvUnsafeArray<float4, 4> _15 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
+constant spvUnsafeArray<float, 1> _45 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _46 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 foo;
+    float4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct main0_patchOut
+{
+};
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _29 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    gl_out[gl_InvocationID].foo = _15[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_Position = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    gl_out[gl_InvocationID].gl_PointSize = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+    gl_out[gl_InvocationID].gl_ClipDistance[0] = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance[0];
+    gl_out[gl_InvocationID].gl_CullDistance[0] = _29[gl_GlobalInvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance[0];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup float4 spvStoragefoo_patch[8];
+    threadgroup float4 (&foo_patch) = spvStoragefoo_patch[(gl_GlobalInvocationID.x / 4) % 8];
+    foo_patch = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].foo = float4(1.0);
+    foo_patch = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc
new file mode 100644 (file)
index 0000000..e132d76
--- /dev/null
@@ -0,0 +1,93 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+constant spvUnsafeArray<float4, 4> _15 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
+constant spvUnsafeArray<float, 1> _45 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _46 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 foo;
+    float4 gl_Position;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct main0_patchOut
+{
+    float4 foo_patch;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<gl_PerVertex, 4> _29 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    gl_out[gl_InvocationID].foo = _15[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_Position = _29[gl_GlobalInvocationID].gl_Position;
+    gl_out[gl_InvocationID].gl_ClipDistance[0] = _29[gl_GlobalInvocationID].gl_ClipDistance[0];
+    gl_out[gl_InvocationID].gl_CullDistance[0] = _29[gl_GlobalInvocationID].gl_CullDistance[0];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    gl_out_masked[gl_InvocationID] = _29[gl_InvocationID];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    patchOut.foo_patch = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].foo = float4(1.0);
+    patchOut.foo_patch = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out_masked[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc
new file mode 100644 (file)
index 0000000..909b012
--- /dev/null
@@ -0,0 +1,93 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+constant spvUnsafeArray<float4, 4> _15 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
+constant spvUnsafeArray<float, 1> _45 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _46 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 foo;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct main0_patchOut
+{
+    float4 foo_patch;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<gl_PerVertex, 4> _29 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    gl_out[gl_InvocationID].foo = _15[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_PointSize = _29[gl_GlobalInvocationID].gl_PointSize;
+    gl_out[gl_InvocationID].gl_ClipDistance[0] = _29[gl_GlobalInvocationID].gl_ClipDistance[0];
+    gl_out[gl_InvocationID].gl_CullDistance[0] = _29[gl_GlobalInvocationID].gl_CullDistance[0];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    gl_out_masked[gl_InvocationID] = _29[gl_InvocationID];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    patchOut.foo_patch = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].foo = float4(1.0);
+    patchOut.foo_patch = float4(2.0);
+    gl_out_masked[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc
new file mode 100644 (file)
index 0000000..55f124a
--- /dev/null
@@ -0,0 +1,52 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float C_a;
+    float C_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float P_b;
+};
+
+static inline __attribute__((always_inline))
+void write_in_function(threadgroup P& _11, device main0_patchOut& patchOut, device main0_out* thread & gl_out, thread uint& gl_InvocationID)
+{
+    _11.a = 1.0;
+    patchOut.P_b = 2.0;
+    gl_out[gl_InvocationID].C_a = 3.0;
+    gl_out[gl_InvocationID].C_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup P spvStorage_11[8];
+    threadgroup P (&_11) = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    write_in_function(_11, patchOut, gl_out, gl_InvocationID);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc
new file mode 100644 (file)
index 0000000..63b9554
--- /dev/null
@@ -0,0 +1,52 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float C_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float P_a;
+    float P_b;
+};
+
+static inline __attribute__((always_inline))
+void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], device main0_out* thread & gl_out, thread uint& gl_InvocationID)
+{
+    patchOut.P_a = 1.0;
+    patchOut.P_b = 2.0;
+    c[gl_InvocationID].a = 3.0;
+    gl_out[gl_InvocationID].C_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup C spvStoragec[8][4];
+    threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    write_in_function(patchOut, c, gl_out, gl_InvocationID);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc
new file mode 100644 (file)
index 0000000..9b8401b
--- /dev/null
@@ -0,0 +1,87 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, device main0_out* thread & gl_out)
+{
+    v0[gl_InvocationID] = float4(1.0);
+    v0[gl_InvocationID].z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((device float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((device float*)&v1[1])[0u] = 5.0;
+    }
+    v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup float4 spvStoragev0[8][4];
+    threadgroup float4 (&v0)[4] = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    write_in_func(v0, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc
new file mode 100644 (file)
index 0000000..4b051bf
--- /dev/null
@@ -0,0 +1,48 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    float4 v3;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup float4 (&v1)[2], device float4& v3)
+{
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((threadgroup float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((threadgroup float*)&v1[1])[0u] = 5.0;
+    }
+    v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup float4 spvStoragev1[8][2];
+    threadgroup float4 (&v1)[2] = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    write_in_func(gl_out, gl_InvocationID, v1, patchOut.v3);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc b/reference/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc
new file mode 100644 (file)
index 0000000..d2c5a13
--- /dev/null
@@ -0,0 +1,95 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
+{
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((device float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((device float*)&v1[1])[0u] = 5.0;
+    }
+    v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out_masked[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc b/reference/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc
new file mode 100644 (file)
index 0000000..f48d707
--- /dev/null
@@ -0,0 +1,95 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct main0_out
+{
+    float4 v0;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
+{
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((device float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((device float*)&v1[1])[0u] = 5.0;
+    }
+    v3 = float4(5.0);
+    gl_out_masked[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out_masked[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked);
+}
+
diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc
new file mode 100644 (file)
index 0000000..a3d4899
--- /dev/null
@@ -0,0 +1,85 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 44
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %C "C"
+               OpMemberName %C 0 "v"
+               OpName %c "c"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %P "P"
+               OpMemberName %P 0 "v"
+               OpName %p "p"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %C Block
+               OpDecorate %c Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpMemberDecorate %P 0 Patch
+               OpDecorate %P Block
+               OpDecorate %p Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %C = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_C_uint_4 = OpTypeArray %C %uint_4
+%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4
+               %zero_c = OpConstantNull %_arr_C_uint_4
+          %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %P = OpTypeStruct %v4float
+%_ptr_Output_P = OpTypePointer Output %P
+               %zero_p = OpConstantNull %P
+          %p = OpVariable %_ptr_Output_P Output %zero_p
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output
+    %float_3 = OpConstant %float 3
+         %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpLoad %int %gl_InvocationID
+         %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0
+               OpStore %22 %20
+         %28 = OpAccessChain %_ptr_Output_v4float %p %int_0
+               OpStore %28 %27
+         %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0
+               OpStore %38 %37
+         %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1
+               OpStore %43 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc
new file mode 100644 (file)
index 0000000..a3d4899
--- /dev/null
@@ -0,0 +1,85 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 44
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %C "C"
+               OpMemberName %C 0 "v"
+               OpName %c "c"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %P "P"
+               OpMemberName %P 0 "v"
+               OpName %p "p"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %C Block
+               OpDecorate %c Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpMemberDecorate %P 0 Patch
+               OpDecorate %P Block
+               OpDecorate %p Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %C = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_C_uint_4 = OpTypeArray %C %uint_4
+%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4
+               %zero_c = OpConstantNull %_arr_C_uint_4
+          %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %P = OpTypeStruct %v4float
+%_ptr_Output_P = OpTypePointer Output %P
+               %zero_p = OpConstantNull %P
+          %p = OpVariable %_ptr_Output_P Output %zero_p
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output
+    %float_3 = OpConstant %float 3
+         %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpLoad %int %gl_InvocationID
+         %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0
+               OpStore %22 %20
+         %28 = OpAccessChain %_ptr_Output_v4float %p %int_0
+               OpStore %28 %27
+         %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0
+               OpStore %38 %37
+         %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1
+               OpStore %43 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc
new file mode 100644 (file)
index 0000000..23424ff
--- /dev/null
@@ -0,0 +1,86 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 44
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %C "C"
+               OpMemberName %C 0 "v"
+               OpName %c "c"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %P "P"
+               OpMemberName %P 0 "v"
+               OpName %p "p"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %C Block
+               OpDecorate %c Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpMemberDecorate %P 0 Patch
+               OpDecorate %P Block
+               OpDecorate %p Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %C = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_C_uint_4 = OpTypeArray %C %uint_4
+%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4
+               %zero_c = OpConstantNull %_arr_C_uint_4
+          %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %P = OpTypeStruct %v4float
+%_ptr_Output_P = OpTypePointer Output %P
+               %zero_p = OpConstantNull %P
+          %p = OpVariable %_ptr_Output_P Output %zero_p
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+       %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+    %float_3 = OpConstant %float 3
+         %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpLoad %int %gl_InvocationID
+         %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0
+               OpStore %22 %20
+         %28 = OpAccessChain %_ptr_Output_v4float %p %int_0
+               OpStore %28 %27
+         %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0
+               OpStore %38 %37
+         %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1
+               OpStore %43 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc
new file mode 100644 (file)
index 0000000..23424ff
--- /dev/null
@@ -0,0 +1,86 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 44
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %C "C"
+               OpMemberName %C 0 "v"
+               OpName %c "c"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %P "P"
+               OpMemberName %P 0 "v"
+               OpName %p "p"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %C Block
+               OpDecorate %c Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpMemberDecorate %P 0 Patch
+               OpDecorate %P Block
+               OpDecorate %p Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %C = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_C_uint_4 = OpTypeArray %C %uint_4
+%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4
+               %zero_c = OpConstantNull %_arr_C_uint_4
+          %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %P = OpTypeStruct %v4float
+%_ptr_Output_P = OpTypePointer Output %P
+               %zero_p = OpConstantNull %P
+          %p = OpVariable %_ptr_Output_P Output %zero_p
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+       %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+    %float_3 = OpConstant %float 3
+         %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpLoad %int %gl_InvocationID
+         %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0
+               OpStore %22 %20
+         %28 = OpAccessChain %_ptr_Output_v4float %p %int_0
+               OpStore %28 %27
+         %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0
+               OpStore %38 %37
+         %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1
+               OpStore %43 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc
new file mode 100644 (file)
index 0000000..6b616b0
--- /dev/null
@@ -0,0 +1,76 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 40
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foo "foo"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %foo_patch "foo_patch"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %foo Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo_patch Patch
+               OpDecorate %foo_patch Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+       %zero_foo = OpConstantNull %_arr_v4float_uint_4
+%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
+        %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+    %float_1 = OpConstant %float 1
+         %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %zero_foo_patch = OpConstantNull %v4float
+  %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch
+    %float_2 = OpConstant %float 2
+         %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+       %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+      %int_0 = OpConstant %int 0
+    %float_3 = OpConstant %float 3
+         %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpLoad %int %gl_InvocationID
+         %20 = OpAccessChain %_ptr_Output_v4float %foo %16
+               OpStore %20 %18
+               OpStore %foo_patch %23
+         %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0
+               OpStore %34 %33
+         %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1
+               OpStore %39 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc
new file mode 100644 (file)
index 0000000..6b616b0
--- /dev/null
@@ -0,0 +1,76 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 40
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foo "foo"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %foo_patch "foo_patch"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %foo Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo_patch Patch
+               OpDecorate %foo_patch Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+       %zero_foo = OpConstantNull %_arr_v4float_uint_4
+%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
+        %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+    %float_1 = OpConstant %float 1
+         %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %zero_foo_patch = OpConstantNull %v4float
+  %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch
+    %float_2 = OpConstant %float 2
+         %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+       %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+      %int_0 = OpConstant %int 0
+    %float_3 = OpConstant %float 3
+         %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpLoad %int %gl_InvocationID
+         %20 = OpAccessChain %_ptr_Output_v4float %foo %16
+               OpStore %20 %18
+               OpStore %foo_patch %23
+         %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0
+               OpStore %34 %33
+         %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1
+               OpStore %39 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc
new file mode 100644 (file)
index 0000000..6b616b0
--- /dev/null
@@ -0,0 +1,76 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 40
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foo "foo"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %foo_patch "foo_patch"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %foo Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo_patch Patch
+               OpDecorate %foo_patch Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+       %zero_foo = OpConstantNull %_arr_v4float_uint_4
+%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
+        %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+    %float_1 = OpConstant %float 1
+         %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %zero_foo_patch = OpConstantNull %v4float
+  %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch
+    %float_2 = OpConstant %float 2
+         %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+       %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+      %int_0 = OpConstant %int 0
+    %float_3 = OpConstant %float 3
+         %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpLoad %int %gl_InvocationID
+         %20 = OpAccessChain %_ptr_Output_v4float %foo %16
+               OpStore %20 %18
+               OpStore %foo_patch %23
+         %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0
+               OpStore %34 %33
+         %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1
+               OpStore %39 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc
new file mode 100644 (file)
index 0000000..6b616b0
--- /dev/null
@@ -0,0 +1,76 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 40
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foo "foo"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %foo_patch "foo_patch"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %foo Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo_patch Patch
+               OpDecorate %foo_patch Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+       %zero_foo = OpConstantNull %_arr_v4float_uint_4
+%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
+        %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+    %float_1 = OpConstant %float 1
+         %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %zero_foo_patch = OpConstantNull %v4float
+  %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch
+    %float_2 = OpConstant %float 2
+         %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+       %zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+      %int_0 = OpConstant %int 0
+    %float_3 = OpConstant %float 3
+         %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpLoad %int %gl_InvocationID
+         %20 = OpAccessChain %_ptr_Output_v4float %foo %16
+               OpStore %20 %18
+               OpStore %foo_patch %23
+         %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0
+               OpStore %34 %33
+         %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1
+               OpStore %39 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc b/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc
new file mode 100644 (file)
index 0000000..955f2c4
--- /dev/null
@@ -0,0 +1,28 @@
+#version 450
+
+layout(vertices = 4) out;
+patch out P
+{
+       layout(location = 0) float a;
+       layout(location = 2) float b;
+};
+
+out C
+{
+       layout(location = 1) float a;
+       layout(location = 3) float b;
+} c[];
+
+void write_in_function()
+{
+       a = 1.0;
+       b = 2.0;
+       c[gl_InvocationID].a = 3.0;
+       c[gl_InvocationID].b = 4.0;
+       gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+}
+
+void main()
+{
+       write_in_function();
+}
diff --git a/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc b/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc
new file mode 100644 (file)
index 0000000..955f2c4
--- /dev/null
@@ -0,0 +1,28 @@
+#version 450
+
+layout(vertices = 4) out;
+patch out P
+{
+       layout(location = 0) float a;
+       layout(location = 2) float b;
+};
+
+out C
+{
+       layout(location = 1) float a;
+       layout(location = 3) float b;
+} c[];
+
+void write_in_function()
+{
+       a = 1.0;
+       b = 2.0;
+       c[gl_InvocationID].a = 3.0;
+       c[gl_InvocationID].b = 4.0;
+       gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+}
+
+void main()
+{
+       write_in_function();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc b/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc
new file mode 100644 (file)
index 0000000..9f3ca9f
--- /dev/null
@@ -0,0 +1,29 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1[2];
+layout(location = 3) patch out vec4 v3;
+
+void write_in_func()
+{
+       v0[gl_InvocationID] = vec4(1.0);
+       v0[gl_InvocationID].z = 3.0;
+       if (gl_InvocationID == 0)
+       {
+               v1[0] = vec4(2.0);
+               v1[0].x = 3.0;
+               v1[1] = vec4(2.0);
+               v1[1].x = 5.0;
+       }
+       v3 = vec4(5.0);
+       gl_out[gl_InvocationID].gl_Position = vec4(10.0);
+       gl_out[gl_InvocationID].gl_Position.z = 20.0;
+       gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+void main()
+{
+       write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc b/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc
new file mode 100644 (file)
index 0000000..9f3ca9f
--- /dev/null
@@ -0,0 +1,29 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1[2];
+layout(location = 3) patch out vec4 v3;
+
+void write_in_func()
+{
+       v0[gl_InvocationID] = vec4(1.0);
+       v0[gl_InvocationID].z = 3.0;
+       if (gl_InvocationID == 0)
+       {
+               v1[0] = vec4(2.0);
+               v1[0].x = 3.0;
+               v1[1] = vec4(2.0);
+               v1[1].x = 5.0;
+       }
+       v3 = vec4(5.0);
+       gl_out[gl_InvocationID].gl_Position = vec4(10.0);
+       gl_out[gl_InvocationID].gl_Position.z = 20.0;
+       gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+void main()
+{
+       write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc b/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc
new file mode 100644 (file)
index 0000000..9f3ca9f
--- /dev/null
@@ -0,0 +1,29 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1[2];
+layout(location = 3) patch out vec4 v3;
+
+void write_in_func()
+{
+       v0[gl_InvocationID] = vec4(1.0);
+       v0[gl_InvocationID].z = 3.0;
+       if (gl_InvocationID == 0)
+       {
+               v1[0] = vec4(2.0);
+               v1[0].x = 3.0;
+               v1[1] = vec4(2.0);
+               v1[1].x = 5.0;
+       }
+       v3 = vec4(5.0);
+       gl_out[gl_InvocationID].gl_Position = vec4(10.0);
+       gl_out[gl_InvocationID].gl_Position.z = 20.0;
+       gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+void main()
+{
+       write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc b/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc
new file mode 100644 (file)
index 0000000..9f3ca9f
--- /dev/null
@@ -0,0 +1,29 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1[2];
+layout(location = 3) patch out vec4 v3;
+
+void write_in_func()
+{
+       v0[gl_InvocationID] = vec4(1.0);
+       v0[gl_InvocationID].z = 3.0;
+       if (gl_InvocationID == 0)
+       {
+               v1[0] = vec4(2.0);
+               v1[0].x = 3.0;
+               v1[1] = vec4(2.0);
+               v1[1].x = 5.0;
+       }
+       v3 = vec4(5.0);
+       gl_out[gl_InvocationID].gl_Position = vec4(10.0);
+       gl_out[gl_InvocationID].gl_Position.z = 20.0;
+       gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+void main()
+{
+       write_in_func();
+}
index b34dab4..dc7a51f 100644 (file)
@@ -2958,6 +2958,77 @@ bool CompilerMSL::variable_storage_requires_stage_io(spv::StorageClass storage)
                return false;
 }
 
+void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array)
+{
+       auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
+       bool threadgroup_storage = variable_decl_is_remapped_storage(masked_var, StorageClassWorkgroup);
+
+       if (threadgroup_storage && msl_options.multi_patch_workgroup)
+       {
+               // We need one threadgroup block per patch, so fake this.
+               entry_func.fixup_hooks_in.push_back([this, &masked_var]() {
+                       auto &type = get_variable_data_type(masked_var);
+                       add_local_variable_name(masked_var.self);
+
+                       bool old_is_builtin = is_using_builtin_array;
+                       is_using_builtin_array = true;
+
+                       const uint32_t max_control_points_per_patch = 32u;
+                       uint32_t max_num_instances =
+                                       (max_control_points_per_patch + get_entry_point().output_vertices - 1u) /
+                                       get_entry_point().output_vertices;
+                       statement("threadgroup ", type_to_glsl(type), " ",
+                                 "spvStorage", to_name(masked_var.self), "[", max_num_instances, "]",
+                                 type_to_array_glsl(type), ";");
+
+                       // Assign a threadgroup slice to each PrimitiveID.
+                       // We assume here that workgroup size is rounded to 32,
+                       // since that's the maximum number of control points per patch.
+                       // We cannot size the array based on fixed dispatch parameters,
+                       // since Metal does not allow that. :(
+                       // FIXME: We will likely need an option to support passing down target workgroup size,
+                       // so we can emit appropriate size here.
+                       statement("threadgroup ", type_to_glsl(type), " ",
+                                 "(&", to_name(masked_var.self), ")",
+                                 type_to_array_glsl(type), " = spvStorage", to_name(masked_var.self), "[",
+                                 "(", to_expression(builtin_invocation_id_id), ".x / ",
+                                 get_entry_point().output_vertices, ") % ",
+                                 max_num_instances, "];");
+
+                       is_using_builtin_array = old_is_builtin;
+               });
+       }
+       else
+       {
+               entry_func.add_local_variable(masked_var.self);
+       }
+
+       if (!threadgroup_storage)
+       {
+               vars_needing_early_declaration.push_back(masked_var.self);
+       }
+       else if (masked_var.initializer)
+       {
+               // Cannot directly initialize threadgroup variables. Need fixup hooks.
+               ID initializer = masked_var.initializer;
+               if (strip_array)
+               {
+                       entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() {
+                               statement(to_expression(masked_var.self), "[",
+                                         builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "] = ",
+                                         to_expression(initializer), "[",
+                                         builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
+                       });
+               }
+               else
+               {
+                       entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() {
+                               statement(to_expression(masked_var.self), " = ", to_expression(initializer), ";");
+                       });
+               }
+       }
+}
+
 void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, SPIRType &ib_type,
                                                   SPIRVariable &var, InterfaceBlockMeta &meta)
 {
@@ -2970,34 +3041,6 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
        auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
        bool is_block = has_decoration(var_type.self, DecorationBlock);
 
-       const auto emit_local_masked_variable = [this, &entry_func, meta](SPIRVariable &masked_var) {
-               entry_func.add_local_variable(masked_var.self);
-               if (!variable_decl_is_remapped_storage(masked_var, StorageClassWorkgroup))
-               {
-                       vars_needing_early_declaration.push_back(masked_var.self);
-               }
-               else if (masked_var.initializer)
-               {
-                       // Cannot directly initialize threadgroup variables. Need fixup hooks.
-                       ID initializer = masked_var.initializer;
-                       if (meta.strip_array)
-                       {
-                               entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() {
-                                       statement(to_expression(masked_var.self), "[",
-                                                 builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "] = ",
-                                                 to_expression(initializer), "[",
-                                                 builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
-                               });
-                       }
-                       else
-                       {
-                               entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() {
-                                       statement(to_expression(masked_var.self), " = ", to_expression(initializer), ";");
-                               });
-                       }
-               }
-       };
-
        // If stage variables are masked out, emit them as plain variables instead.
        // For builtins, we query them one by one later.
        // IO blocks are not masked here, we need to mask them per-member instead.
@@ -3005,7 +3048,7 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
        {
                // If we ignore an output, we must still emit it, since it might be used by app.
                // Instead, just emit it as early declaration.
-               emit_local_masked_variable(var);
+               emit_local_masked_variable(var, meta.strip_array);
                return;
        }
 
@@ -3022,7 +3065,7 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
                        // we unflatten I/O blocks while running the shader,
                        // and pass the actual struct type down to leaf functions.
                        // We then unflatten inputs, and flatten outputs in the "fixup" stages.
-                       emit_local_masked_variable(var);
+                       emit_local_masked_variable(var, meta.strip_array);
                }
 
                if (!block_requires_flattening)
@@ -3121,7 +3164,7 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
                                        set_name(var.self, "gl_out_masked");
                                        stage_out_masked_builtin_type_id = var_type.self;
                                }
-                               emit_local_masked_variable(var);
+                               emit_local_masked_variable(var, meta.strip_array);
                        }
                }
        }
index 7b1e6fc..3cc93d5 100644 (file)
@@ -806,6 +806,7 @@ protected:
                bool allow_local_declaration = false;
        };
 
+       void emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array);
        void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type,
                                             SPIRVariable &var, InterfaceBlockMeta &meta);
        void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,