MSL: Fix copy of arrays to/from stage IO variables.
authorHans-Kristian Arntzen <post@arntzen-software.no>
Mon, 19 Apr 2021 09:46:30 +0000 (11:46 +0200)
committerHans-Kristian Arntzen <post@arntzen-software.no>
Mon, 19 Apr 2021 10:10:49 +0000 (12:10 +0200)
Need to take into account effective storage classes and whether or not
we target stage IO blocks since native arrays are conditionally enabled.

18 files changed:
reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc [new file with mode: 0644]
reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc [new file with mode: 0644]
reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc [new file with mode: 0644]
reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc [new file with mode: 0644]
reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc [new file with mode: 0644]
reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc [new file with mode: 0644]
reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc [new file with mode: 0644]
reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc [new file with mode: 0644]
shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc [new file with mode: 0644]
shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc [new file with mode: 0644]
shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc [new file with mode: 0644]
shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc [new file with mode: 0644]
spirv_cross.cpp
spirv_cross.hpp
spirv_glsl.cpp
spirv_glsl.hpp
spirv_msl.cpp
spirv_msl.hpp

diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc
new file mode 100644 (file)
index 0000000..4f9134e
--- /dev/null
@@ -0,0 +1,188 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    spvUnsafeArray<float4, 2> iFoo;
+    float4 ipFoo;
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup float4 spvStorageFoo[8][4][2];
+    threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements);
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc
new file mode 100644 (file)
index 0000000..e9dd68d
--- /dev/null
@@ -0,0 +1,191 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    float4 iFoo_0 [[attribute(0)]];
+    float4 iFoo_1 [[attribute(1)]];
+    float4 ipFoo [[attribute(2)]];
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    threadgroup float4 Foo[4][2];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
+    spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements);
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc
new file mode 100644 (file)
index 0000000..a2ad010
--- /dev/null
@@ -0,0 +1,79 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    spvUnsafeArray<float4, 2> Foo;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    spvUnsafeArray<float4, 2> iFoo;
+    float4 ipFoo;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    gl_out[gl_InvocationID].Foo = gl_in[gl_InvocationID].iFoo;
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc
new file mode 100644 (file)
index 0000000..3da1d18
--- /dev/null
@@ -0,0 +1,83 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    spvUnsafeArray<float4, 2> Foo;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    float4 iFoo_0 [[attribute(0)]];
+    float4 iFoo_1 [[attribute(1)]];
+    float4 ipFoo [[attribute(2)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
+    gl_out[gl_InvocationID].Foo = _38;
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc
new file mode 100644 (file)
index 0000000..4f9134e
--- /dev/null
@@ -0,0 +1,188 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    spvUnsafeArray<float4, 2> iFoo;
+    float4 ipFoo;
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup float4 spvStorageFoo[8][4][2];
+    threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements);
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc
new file mode 100644 (file)
index 0000000..e9dd68d
--- /dev/null
@@ -0,0 +1,191 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    float4 iFoo_0 [[attribute(0)]];
+    float4 iFoo_1 [[attribute(1)]];
+    float4 ipFoo [[attribute(2)]];
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    threadgroup float4 Foo[4][2];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
+    spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements);
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc
new file mode 100644 (file)
index 0000000..a2ad010
--- /dev/null
@@ -0,0 +1,79 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    spvUnsafeArray<float4, 2> Foo;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    spvUnsafeArray<float4, 2> iFoo;
+    float4 ipFoo;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    gl_out[gl_InvocationID].Foo = gl_in[gl_InvocationID].iFoo;
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc
new file mode 100644 (file)
index 0000000..3da1d18
--- /dev/null
@@ -0,0 +1,83 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    spvUnsafeArray<float4, 2> Foo;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    float4 iFoo_0 [[attribute(0)]];
+    float4 iFoo_1 [[attribute(1)]];
+    float4 ipFoo [[attribute(2)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
+    gl_out[gl_InvocationID].Foo = _38;
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc
new file mode 100644 (file)
index 0000000..e69a7a1
--- /dev/null
@@ -0,0 +1,17 @@
+#version 450
+
+layout(vertices = 4) out;
+layout(location = 0) out vec4 Foo[][2];
+layout(location = 0) in vec4 iFoo[][2];
+layout(location = 2) patch out vec4 pFoo[2];
+layout(location = 2) in vec4 ipFoo[];
+
+void main()
+{
+       gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+       Foo[gl_InvocationID] = iFoo[gl_InvocationID];
+       if (gl_InvocationID == 0)
+       {
+               pFoo = vec4[](ipFoo[0], ipFoo[1]);
+       }
+}
diff --git a/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc
new file mode 100644 (file)
index 0000000..e69a7a1
--- /dev/null
@@ -0,0 +1,17 @@
+#version 450
+
+layout(vertices = 4) out;
+layout(location = 0) out vec4 Foo[][2];
+layout(location = 0) in vec4 iFoo[][2];
+layout(location = 2) patch out vec4 pFoo[2];
+layout(location = 2) in vec4 ipFoo[];
+
+void main()
+{
+       gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+       Foo[gl_InvocationID] = iFoo[gl_InvocationID];
+       if (gl_InvocationID == 0)
+       {
+               pFoo = vec4[](ipFoo[0], ipFoo[1]);
+       }
+}
diff --git a/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc
new file mode 100644 (file)
index 0000000..e69a7a1
--- /dev/null
@@ -0,0 +1,17 @@
+#version 450
+
+layout(vertices = 4) out;
+layout(location = 0) out vec4 Foo[][2];
+layout(location = 0) in vec4 iFoo[][2];
+layout(location = 2) patch out vec4 pFoo[2];
+layout(location = 2) in vec4 ipFoo[];
+
+void main()
+{
+       gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+       Foo[gl_InvocationID] = iFoo[gl_InvocationID];
+       if (gl_InvocationID == 0)
+       {
+               pFoo = vec4[](ipFoo[0], ipFoo[1]);
+       }
+}
diff --git a/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc
new file mode 100644 (file)
index 0000000..e69a7a1
--- /dev/null
@@ -0,0 +1,17 @@
+#version 450
+
+layout(vertices = 4) out;
+layout(location = 0) out vec4 Foo[][2];
+layout(location = 0) in vec4 iFoo[][2];
+layout(location = 2) patch out vec4 pFoo[2];
+layout(location = 2) in vec4 ipFoo[];
+
+void main()
+{
+       gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+       Foo[gl_InvocationID] = iFoo[gl_InvocationID];
+       if (gl_InvocationID == 0)
+       {
+               pFoo = vec4[](ipFoo[0], ipFoo[1]);
+       }
+}
index afd8c50..5bba905 100644 (file)
@@ -284,31 +284,6 @@ SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain)
        return var;
 }
 
-StorageClass Compiler::get_expression_effective_storage_class(uint32_t ptr)
-{
-       auto *var = maybe_get_backing_variable(ptr);
-
-       // If the expression has been lowered to a temporary, we need to use the Generic storage class.
-       // We're looking for the effective storage class of a given expression.
-       // An access chain or forwarded OpLoads from such access chains
-       // will generally have the storage class of the underlying variable, but if the load was not forwarded
-       // we have lost any address space qualifiers.
-       bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
-                               (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
-
-       if (var && !forced_temporary)
-       {
-               // Normalize SSBOs to StorageBuffer here.
-               if (var->storage == StorageClassUniform &&
-                   has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
-                       return StorageClassStorageBuffer;
-               else
-                       return var->storage;
-       }
-       else
-               return expression_type(ptr).storage;
-}
-
 void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded)
 {
        auto &e = get<SPIRExpression>(expr);
index c603f84..764c6c4 100644 (file)
@@ -671,7 +671,6 @@ protected:
        bool expression_is_lvalue(uint32_t id) const;
        bool variable_storage_is_aliased(const SPIRVariable &var);
        SPIRVariable *maybe_get_backing_variable(uint32_t chain);
-       spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
 
        void register_read(uint32_t expr, uint32_t chain, bool forwarded);
        void register_write(uint32_t chain);
index d987f78..5acbf84 100644 (file)
@@ -8661,12 +8661,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
                        bool ignore_potential_sliced_writes = false;
                        if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
                        {
-                               auto *var = maybe_get_backing_variable(base);
-                               if (var && variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
-                                       effective_storage = StorageClassWorkgroup;
-                               else if (var && variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
-                                       effective_storage = StorageClassStorageBuffer;
-                               else if (expression_type(base).pointer)
+                               if (expression_type(base).pointer)
                                        effective_storage = get_expression_effective_storage_class(base);
 
                                // Special consideration for control points.
@@ -8674,6 +8669,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
                                // to consider scalar access chains here.
                                // Cleans up some cases where it's very painful to determine the accurate storage class
                                // since blocks can be partially masked ...
+                               auto *var = maybe_get_backing_variable(base);
                                if (var && var->storage == StorageClassOutput &&
                                    get_execution_model() == ExecutionModelTessellationControl &&
                                    !has_decoration(var->self, DecorationPatch))
@@ -9911,7 +9907,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
                        // it is an array, and our backend does not support arrays as value types.
                        // Emit the temporary, and copy it explicitly.
                        e = &emit_uninitialized_temporary_expression(result_type, id);
-                       emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
+                       emit_array_copy(to_expression(id), id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
                }
                else
                        e = &emit_op(result_type, id, expr, forward, !usage_tracking);
@@ -14794,7 +14790,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
                                // The backend is responsible for setting this up, and redirection the return values as appropriate.
                                if (ir.ids[block.return_value].get_type() != TypeUndef)
                                {
-                                       emit_array_copy("spvReturnValue", block.return_value, StorageClassFunction,
+                                       emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction,
                                                        get_expression_effective_storage_class(block.return_value));
                                }
 
@@ -15013,7 +15009,7 @@ uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
                            MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
 }
 
-void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass)
+void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass)
 {
        statement(lhs, " = ", to_expression(rhs_id), ";");
 }
@@ -15923,3 +15919,33 @@ uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var,
 
        return location;
 }
+
+StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
+{
+       auto *var = maybe_get_backing_variable(ptr);
+
+       // If the expression has been lowered to a temporary, we need to use the Generic storage class.
+       // We're looking for the effective storage class of a given expression.
+       // An access chain or forwarded OpLoads from such access chains
+       // will generally have the storage class of the underlying variable, but if the load was not forwarded
+       // we have lost any address space qualifiers.
+       bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
+                               (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
+
+       if (var && !forced_temporary)
+       {
+               if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
+                       return StorageClassWorkgroup;
+               if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
+                       return StorageClassStorageBuffer;
+
+               // Normalize SSBOs to StorageBuffer here.
+               if (var->storage == StorageClassUniform &&
+                   has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+                       return StorageClassStorageBuffer;
+               else
+                       return var->storage;
+       }
+       else
+               return expression_type(ptr).storage;
+}
index b6d16d3..ae01f5c 100644 (file)
@@ -673,6 +673,7 @@ protected:
        std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags,
                                          AccessChainMeta *meta);
 
+       spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
        virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base);
 
        virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
@@ -739,8 +740,8 @@ protected:
        std::string layout_for_variable(const SPIRVariable &variable);
        std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id);
        virtual bool skip_argument(uint32_t id) const;
-       virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage,
-                                    spv::StorageClass rhs_storage);
+       virtual void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id,
+                                    spv::StorageClass lhs_storage, spv::StorageClass rhs_storage);
        virtual void emit_block_hints(const SPIRBlock &block);
        virtual std::string to_initializer_expression(const SPIRVariable &var);
        virtual std::string to_zero_initialized_expression(uint32_t type_id);
index cc42686..f904319 100644 (file)
@@ -8397,20 +8397,46 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
        flush_all_active_variables();
 }
 
-void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass lhs_storage,
-                                  StorageClass rhs_storage)
+static bool storage_class_array_is_thread(StorageClass storage)
+{
+       switch (storage)
+       {
+       case StorageClassInput:
+       case StorageClassOutput:
+       case StorageClassGeneric:
+       case StorageClassFunction:
+       case StorageClassPrivate:
+               return true;
+
+       default:
+               return false;
+       }
+}
+
+void CompilerMSL::emit_array_copy(const string &lhs, uint32_t lhs_id, uint32_t rhs_id,
+                                                                 StorageClass lhs_storage, StorageClass rhs_storage)
 {
        // Allow Metal to use the array<T> template to make arrays a value type.
        // This, however, cannot be used for threadgroup address specifiers, so consider the custom array copy as fallback.
-       bool lhs_thread = (lhs_storage == StorageClassOutput || lhs_storage == StorageClassFunction ||
-                          lhs_storage == StorageClassGeneric || lhs_storage == StorageClassPrivate);
-       bool rhs_thread = (rhs_storage == StorageClassInput || rhs_storage == StorageClassFunction ||
-                          rhs_storage == StorageClassOutput ||
-                          rhs_storage == StorageClassGeneric || rhs_storage == StorageClassPrivate);
+       bool lhs_is_thread_storage = storage_class_array_is_thread(lhs_storage);
+       bool rhs_is_thread_storage = storage_class_array_is_thread(rhs_storage);
+
+       bool lhs_is_array_template = lhs_is_thread_storage;
+       bool rhs_is_array_template = rhs_is_thread_storage;
+
+       // Special considerations for stage IO variables.
+       // If the variable is actually backed by non-user visible device storage, we use array templates for those.
+       auto *lhs_var = maybe_get_backing_variable(lhs_id);
+       if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage))
+               lhs_is_array_template = true;
+
+       auto *rhs_var = maybe_get_backing_variable(rhs_id);
+       if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage))
+               rhs_is_array_template = true;
 
        // If threadgroup storage qualifiers are *not* used:
        // Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier.
-       if (lhs_thread && rhs_thread && !using_builtin_array())
+       if (lhs_is_array_template && rhs_is_array_template && !using_builtin_array())
        {
                statement(lhs, " = ", to_expression(rhs_id), ";");
        }
@@ -8452,15 +8478,15 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla
                        add_spv_func_and_recompile(SPVFuncImplArrayCopy);
 
                const char *tag = nullptr;
-               if (lhs_thread && is_constant)
+               if (lhs_is_thread_storage && is_constant)
                        tag = "FromConstantToStack";
                else if (lhs_storage == StorageClassWorkgroup && is_constant)
                        tag = "FromConstantToThreadGroup";
-               else if (lhs_thread && rhs_thread)
+               else if (lhs_is_thread_storage && rhs_is_thread_storage)
                        tag = "FromStackToStack";
-               else if (lhs_storage == StorageClassWorkgroup && rhs_thread)
+               else if (lhs_storage == StorageClassWorkgroup && rhs_is_thread_storage)
                        tag = "FromStackToThreadGroup";
-               else if (lhs_thread && rhs_storage == StorageClassWorkgroup)
+               else if (lhs_is_thread_storage && rhs_storage == StorageClassWorkgroup)
                        tag = "FromThreadGroupToStack";
                else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup)
                        tag = "FromThreadGroupToThreadGroup";
@@ -8470,19 +8496,21 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla
                        tag = "FromConstantToDevice";
                else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassWorkgroup)
                        tag = "FromThreadGroupToDevice";
-               else if (lhs_storage == StorageClassStorageBuffer && rhs_thread)
+               else if (lhs_storage == StorageClassStorageBuffer && rhs_is_thread_storage)
                        tag = "FromStackToDevice";
                else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassStorageBuffer)
                        tag = "FromDeviceToThreadGroup";
-               else if (lhs_thread && rhs_storage == StorageClassStorageBuffer)
+               else if (lhs_is_thread_storage && rhs_storage == StorageClassStorageBuffer)
                        tag = "FromDeviceToStack";
                else
                        SPIRV_CROSS_THROW("Unknown storage class used for copying arrays.");
 
                // Pass internal array of spvUnsafeArray<> into wrapper functions
-               if (lhs_thread && !msl_options.force_native_arrays)
+               if (lhs_is_array_template && rhs_is_array_template && !msl_options.force_native_arrays)
+                       statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ".elements);");
+               if (lhs_is_array_template && !msl_options.force_native_arrays)
                        statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ");");
-               else if (rhs_thread && !msl_options.force_native_arrays)
+               else if (rhs_is_array_template && !msl_options.force_native_arrays)
                        statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ".elements);");
                else
                        statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");");
@@ -8549,8 +8577,9 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
        if (p_v_lhs)
                flush_variable_declaration(p_v_lhs->self);
 
-       emit_array_copy(to_expression(id_lhs), id_rhs, get_expression_effective_storage_class(id_lhs),
-                       get_expression_effective_storage_class(id_rhs));
+       auto lhs_storage = get_expression_effective_storage_class(id_lhs);
+       auto rhs_storage = get_expression_effective_storage_class(id_rhs);
+       emit_array_copy(to_expression(id_lhs), id_lhs, id_rhs, lhs_storage, rhs_storage);
        register_write(id_lhs);
 
        return true;
@@ -13242,9 +13271,9 @@ bool CompilerMSL::variable_decl_is_remapped_storage(const SPIRVariable &variable
                // This is fine, as there cannot be concurrent writers to that memory anyways,
                // so we just ignore that case.
 
-               return capture_output_to_buffer &&
-                      variable.storage == StorageClassOutput &&
-                      !is_stage_output_variable_masked(variable);
+               return (variable.storage == StorageClassOutput || variable.storage == StorageClassInput) &&
+                      !variable_storage_requires_stage_io(variable.storage) &&
+                      (variable.storage != StorageClassOutput || !is_stage_output_variable_masked(variable));
        }
        else
        {
index 65d596d..97800ab 100644 (file)
@@ -913,8 +913,8 @@ protected:
        void add_pragma_line(const std::string &line);
        void add_typedef_line(const std::string &line);
        void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
-       void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage,
-                            spv::StorageClass rhs_storage) override;
+       void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id,
+                            spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override;
        void build_implicit_builtins();
        uint32_t build_constant_uint_array_pointer();
        void emit_entry_point_declarations() override;