--- /dev/null
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+ T elements[Num ? Num : 1];
+
+ thread T& operator [] (size_t pos) thread
+ {
+ return elements[pos];
+ }
+ constexpr const thread T& operator [] (size_t pos) const thread
+ {
+ return elements[pos];
+ }
+
+ device T& operator [] (size_t pos) device
+ {
+ return elements[pos];
+ }
+ constexpr const device T& operator [] (size_t pos) const device
+ {
+ return elements[pos];
+ }
+
+ constexpr const constant T& operator [] (size_t pos) const constant
+ {
+ return elements[pos];
+ }
+
+ threadgroup T& operator [] (size_t pos) threadgroup
+ {
+ return elements[pos];
+ }
+ constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+ {
+ return elements[pos];
+ }
+};
+
+struct main0_out
+{
+ float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+ spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+ spvUnsafeArray<float4, 2> iFoo;
+ float4 ipFoo;
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+ device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+ threadgroup float4 spvStorageFoo[8][4][2];
+ threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
+ device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+ device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+ uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+ uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+ gl_out[gl_InvocationID].gl_Position = float4(1.0);
+ spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements);
+ if (gl_InvocationID == 0)
+ {
+ spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+ patchOut.pFoo = _56;
+ }
+}
+
--- /dev/null
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+ T elements[Num ? Num : 1];
+
+ thread T& operator [] (size_t pos) thread
+ {
+ return elements[pos];
+ }
+ constexpr const thread T& operator [] (size_t pos) const thread
+ {
+ return elements[pos];
+ }
+
+ device T& operator [] (size_t pos) device
+ {
+ return elements[pos];
+ }
+ constexpr const device T& operator [] (size_t pos) const device
+ {
+ return elements[pos];
+ }
+
+ constexpr const constant T& operator [] (size_t pos) const constant
+ {
+ return elements[pos];
+ }
+
+ threadgroup T& operator [] (size_t pos) threadgroup
+ {
+ return elements[pos];
+ }
+ constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+ {
+ return elements[pos];
+ }
+};
+
+struct main0_out
+{
+ float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+ spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+ float4 iFoo_0 [[attribute(0)]];
+ float4 iFoo_1 [[attribute(1)]];
+ float4 ipFoo [[attribute(2)]];
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+ threadgroup float4 Foo[4][2];
+ device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+ device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+ if (gl_InvocationID < spvIndirectParams[0])
+ gl_in[gl_InvocationID] = in;
+ threadgroup_barrier(mem_flags::mem_threadgroup);
+ if (gl_InvocationID >= 4)
+ return;
+ gl_out[gl_InvocationID].gl_Position = float4(1.0);
+ spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
+ spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements);
+ if (gl_InvocationID == 0)
+ {
+ spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+ patchOut.pFoo = _56;
+ }
+}
+
--- /dev/null
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+ T elements[Num ? Num : 1];
+
+ thread T& operator [] (size_t pos) thread
+ {
+ return elements[pos];
+ }
+ constexpr const thread T& operator [] (size_t pos) const thread
+ {
+ return elements[pos];
+ }
+
+ device T& operator [] (size_t pos) device
+ {
+ return elements[pos];
+ }
+ constexpr const device T& operator [] (size_t pos) const device
+ {
+ return elements[pos];
+ }
+
+ constexpr const constant T& operator [] (size_t pos) const constant
+ {
+ return elements[pos];
+ }
+
+ threadgroup T& operator [] (size_t pos) threadgroup
+ {
+ return elements[pos];
+ }
+ constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+ {
+ return elements[pos];
+ }
+};
+
+struct main0_out
+{
+ spvUnsafeArray<float4, 2> Foo;
+ float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+ spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+ spvUnsafeArray<float4, 2> iFoo;
+ float4 ipFoo;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+ device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+ device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+ device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+ uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+ uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+ gl_out[gl_InvocationID].gl_Position = float4(1.0);
+ gl_out[gl_InvocationID].Foo = gl_in[gl_InvocationID].iFoo;
+ if (gl_InvocationID == 0)
+ {
+ spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+ patchOut.pFoo = _56;
+ }
+}
+
--- /dev/null
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+ T elements[Num ? Num : 1];
+
+ thread T& operator [] (size_t pos) thread
+ {
+ return elements[pos];
+ }
+ constexpr const thread T& operator [] (size_t pos) const thread
+ {
+ return elements[pos];
+ }
+
+ device T& operator [] (size_t pos) device
+ {
+ return elements[pos];
+ }
+ constexpr const device T& operator [] (size_t pos) const device
+ {
+ return elements[pos];
+ }
+
+ constexpr const constant T& operator [] (size_t pos) const constant
+ {
+ return elements[pos];
+ }
+
+ threadgroup T& operator [] (size_t pos) threadgroup
+ {
+ return elements[pos];
+ }
+ constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+ {
+ return elements[pos];
+ }
+};
+
+struct main0_out
+{
+ spvUnsafeArray<float4, 2> Foo;
+ float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+ spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+ float4 iFoo_0 [[attribute(0)]];
+ float4 iFoo_1 [[attribute(1)]];
+ float4 ipFoo [[attribute(2)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+ device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+ device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+ if (gl_InvocationID < spvIndirectParams[0])
+ gl_in[gl_InvocationID] = in;
+ threadgroup_barrier(mem_flags::mem_threadgroup);
+ if (gl_InvocationID >= 4)
+ return;
+ gl_out[gl_InvocationID].gl_Position = float4(1.0);
+ spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
+ gl_out[gl_InvocationID].Foo = _38;
+ if (gl_InvocationID == 0)
+ {
+ spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+ patchOut.pFoo = _56;
+ }
+}
+
--- /dev/null
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+ T elements[Num ? Num : 1];
+
+ thread T& operator [] (size_t pos) thread
+ {
+ return elements[pos];
+ }
+ constexpr const thread T& operator [] (size_t pos) const thread
+ {
+ return elements[pos];
+ }
+
+ device T& operator [] (size_t pos) device
+ {
+ return elements[pos];
+ }
+ constexpr const device T& operator [] (size_t pos) const device
+ {
+ return elements[pos];
+ }
+
+ constexpr const constant T& operator [] (size_t pos) const constant
+ {
+ return elements[pos];
+ }
+
+ threadgroup T& operator [] (size_t pos) threadgroup
+ {
+ return elements[pos];
+ }
+ constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+ {
+ return elements[pos];
+ }
+};
+
+struct main0_out
+{
+ float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+ spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+ spvUnsafeArray<float4, 2> iFoo;
+ float4 ipFoo;
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+ device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+ threadgroup float4 spvStorageFoo[8][4][2];
+ threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
+ device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+ device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+ uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+ uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+ gl_out[gl_InvocationID].gl_Position = float4(1.0);
+ spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements);
+ if (gl_InvocationID == 0)
+ {
+ spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+ patchOut.pFoo = _56;
+ }
+}
+
--- /dev/null
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+ T elements[Num ? Num : 1];
+
+ thread T& operator [] (size_t pos) thread
+ {
+ return elements[pos];
+ }
+ constexpr const thread T& operator [] (size_t pos) const thread
+ {
+ return elements[pos];
+ }
+
+ device T& operator [] (size_t pos) device
+ {
+ return elements[pos];
+ }
+ constexpr const device T& operator [] (size_t pos) const device
+ {
+ return elements[pos];
+ }
+
+ constexpr const constant T& operator [] (size_t pos) const constant
+ {
+ return elements[pos];
+ }
+
+ threadgroup T& operator [] (size_t pos) threadgroup
+ {
+ return elements[pos];
+ }
+ constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+ {
+ return elements[pos];
+ }
+};
+
+struct main0_out
+{
+ float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+ spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+ float4 iFoo_0 [[attribute(0)]];
+ float4 iFoo_1 [[attribute(1)]];
+ float4 ipFoo [[attribute(2)]];
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+ for (uint i = 0; i < A; i++)
+ {
+ dst[i] = src[i];
+ }
+}
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+ threadgroup float4 Foo[4][2];
+ device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+ device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+ if (gl_InvocationID < spvIndirectParams[0])
+ gl_in[gl_InvocationID] = in;
+ threadgroup_barrier(mem_flags::mem_threadgroup);
+ if (gl_InvocationID >= 4)
+ return;
+ gl_out[gl_InvocationID].gl_Position = float4(1.0);
+ spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
+ spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements);
+ if (gl_InvocationID == 0)
+ {
+ spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+ patchOut.pFoo = _56;
+ }
+}
+
--- /dev/null
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+ T elements[Num ? Num : 1];
+
+ thread T& operator [] (size_t pos) thread
+ {
+ return elements[pos];
+ }
+ constexpr const thread T& operator [] (size_t pos) const thread
+ {
+ return elements[pos];
+ }
+
+ device T& operator [] (size_t pos) device
+ {
+ return elements[pos];
+ }
+ constexpr const device T& operator [] (size_t pos) const device
+ {
+ return elements[pos];
+ }
+
+ constexpr const constant T& operator [] (size_t pos) const constant
+ {
+ return elements[pos];
+ }
+
+ threadgroup T& operator [] (size_t pos) threadgroup
+ {
+ return elements[pos];
+ }
+ constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+ {
+ return elements[pos];
+ }
+};
+
+struct main0_out
+{
+ spvUnsafeArray<float4, 2> Foo;
+ float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+ spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+ spvUnsafeArray<float4, 2> iFoo;
+ float4 ipFoo;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+ device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+ device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+ device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+ uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+ uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+ gl_out[gl_InvocationID].gl_Position = float4(1.0);
+ gl_out[gl_InvocationID].Foo = gl_in[gl_InvocationID].iFoo;
+ if (gl_InvocationID == 0)
+ {
+ spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+ patchOut.pFoo = _56;
+ }
+}
+
--- /dev/null
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+ T elements[Num ? Num : 1];
+
+ thread T& operator [] (size_t pos) thread
+ {
+ return elements[pos];
+ }
+ constexpr const thread T& operator [] (size_t pos) const thread
+ {
+ return elements[pos];
+ }
+
+ device T& operator [] (size_t pos) device
+ {
+ return elements[pos];
+ }
+ constexpr const device T& operator [] (size_t pos) const device
+ {
+ return elements[pos];
+ }
+
+ constexpr const constant T& operator [] (size_t pos) const constant
+ {
+ return elements[pos];
+ }
+
+ threadgroup T& operator [] (size_t pos) threadgroup
+ {
+ return elements[pos];
+ }
+ constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+ {
+ return elements[pos];
+ }
+};
+
+struct main0_out
+{
+ spvUnsafeArray<float4, 2> Foo;
+ float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+ spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+ float4 iFoo_0 [[attribute(0)]];
+ float4 iFoo_1 [[attribute(1)]];
+ float4 ipFoo [[attribute(2)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+ device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+ device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+ if (gl_InvocationID < spvIndirectParams[0])
+ gl_in[gl_InvocationID] = in;
+ threadgroup_barrier(mem_flags::mem_threadgroup);
+ if (gl_InvocationID >= 4)
+ return;
+ gl_out[gl_InvocationID].gl_Position = float4(1.0);
+ spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
+ gl_out[gl_InvocationID].Foo = _38;
+ if (gl_InvocationID == 0)
+ {
+ spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+ patchOut.pFoo = _56;
+ }
+}
+
--- /dev/null
+#version 450
+
+layout(vertices = 4) out;
+layout(location = 0) out vec4 Foo[][2];
+layout(location = 0) in vec4 iFoo[][2];
+layout(location = 2) patch out vec4 pFoo[2];
+layout(location = 2) in vec4 ipFoo[];
+
+void main()
+{
+ gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+ Foo[gl_InvocationID] = iFoo[gl_InvocationID];
+ if (gl_InvocationID == 0)
+ {
+ pFoo = vec4[](ipFoo[0], ipFoo[1]);
+ }
+}
--- /dev/null
+#version 450
+
+layout(vertices = 4) out;
+layout(location = 0) out vec4 Foo[][2];
+layout(location = 0) in vec4 iFoo[][2];
+layout(location = 2) patch out vec4 pFoo[2];
+layout(location = 2) in vec4 ipFoo[];
+
+void main()
+{
+ gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+ Foo[gl_InvocationID] = iFoo[gl_InvocationID];
+ if (gl_InvocationID == 0)
+ {
+ pFoo = vec4[](ipFoo[0], ipFoo[1]);
+ }
+}
--- /dev/null
+#version 450
+
+layout(vertices = 4) out;
+layout(location = 0) out vec4 Foo[][2];
+layout(location = 0) in vec4 iFoo[][2];
+layout(location = 2) patch out vec4 pFoo[2];
+layout(location = 2) in vec4 ipFoo[];
+
+void main()
+{
+ gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+ Foo[gl_InvocationID] = iFoo[gl_InvocationID];
+ if (gl_InvocationID == 0)
+ {
+ pFoo = vec4[](ipFoo[0], ipFoo[1]);
+ }
+}
--- /dev/null
+#version 450
+
+layout(vertices = 4) out;
+layout(location = 0) out vec4 Foo[][2];
+layout(location = 0) in vec4 iFoo[][2];
+layout(location = 2) patch out vec4 pFoo[2];
+layout(location = 2) in vec4 ipFoo[];
+
+void main()
+{
+ gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+ Foo[gl_InvocationID] = iFoo[gl_InvocationID];
+ if (gl_InvocationID == 0)
+ {
+ pFoo = vec4[](ipFoo[0], ipFoo[1]);
+ }
+}
return var;
}
-StorageClass Compiler::get_expression_effective_storage_class(uint32_t ptr)
-{
- auto *var = maybe_get_backing_variable(ptr);
-
- // If the expression has been lowered to a temporary, we need to use the Generic storage class.
- // We're looking for the effective storage class of a given expression.
- // An access chain or forwarded OpLoads from such access chains
- // will generally have the storage class of the underlying variable, but if the load was not forwarded
- // we have lost any address space qualifiers.
- bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
- (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
-
- if (var && !forced_temporary)
- {
- // Normalize SSBOs to StorageBuffer here.
- if (var->storage == StorageClassUniform &&
- has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
- return StorageClassStorageBuffer;
- else
- return var->storage;
- }
- else
- return expression_type(ptr).storage;
-}
-
void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded)
{
auto &e = get<SPIRExpression>(expr);
bool expression_is_lvalue(uint32_t id) const;
bool variable_storage_is_aliased(const SPIRVariable &var);
SPIRVariable *maybe_get_backing_variable(uint32_t chain);
- spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
void register_read(uint32_t expr, uint32_t chain, bool forwarded);
void register_write(uint32_t chain);
bool ignore_potential_sliced_writes = false;
if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
{
- auto *var = maybe_get_backing_variable(base);
- if (var && variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
- effective_storage = StorageClassWorkgroup;
- else if (var && variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
- effective_storage = StorageClassStorageBuffer;
- else if (expression_type(base).pointer)
+ if (expression_type(base).pointer)
effective_storage = get_expression_effective_storage_class(base);
// Special consideration for control points.
// to consider scalar access chains here.
// Cleans up some cases where it's very painful to determine the accurate storage class
// since blocks can be partially masked ...
+ auto *var = maybe_get_backing_variable(base);
if (var && var->storage == StorageClassOutput &&
get_execution_model() == ExecutionModelTessellationControl &&
!has_decoration(var->self, DecorationPatch))
// it is an array, and our backend does not support arrays as value types.
// Emit the temporary, and copy it explicitly.
e = &emit_uninitialized_temporary_expression(result_type, id);
- emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
+ emit_array_copy(to_expression(id), id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
}
else
e = &emit_op(result_type, id, expr, forward, !usage_tracking);
// The backend is responsible for setting this up, and redirection the return values as appropriate.
if (ir.ids[block.return_value].get_type() != TypeUndef)
{
- emit_array_copy("spvReturnValue", block.return_value, StorageClassFunction,
+ emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction,
get_expression_effective_storage_class(block.return_value));
}
MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
}
-void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass)
+void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass)
{
statement(lhs, " = ", to_expression(rhs_id), ";");
}
return location;
}
+
+StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
+{
+ auto *var = maybe_get_backing_variable(ptr);
+
+ // If the expression has been lowered to a temporary, we need to use the Generic storage class.
+ // We're looking for the effective storage class of a given expression.
+ // An access chain or forwarded OpLoads from such access chains
+ // will generally have the storage class of the underlying variable, but if the load was not forwarded
+ // we have lost any address space qualifiers.
+ bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
+ (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
+
+ if (var && !forced_temporary)
+ {
+ if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
+ return StorageClassWorkgroup;
+ if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
+ return StorageClassStorageBuffer;
+
+ // Normalize SSBOs to StorageBuffer here.
+ if (var->storage == StorageClassUniform &&
+ has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+ return StorageClassStorageBuffer;
+ else
+ return var->storage;
+ }
+ else
+ return expression_type(ptr).storage;
+}
std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags,
AccessChainMeta *meta);
+ spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base);
virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
std::string layout_for_variable(const SPIRVariable &variable);
std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id);
virtual bool skip_argument(uint32_t id) const;
- virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage,
- spv::StorageClass rhs_storage);
+ virtual void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id,
+ spv::StorageClass lhs_storage, spv::StorageClass rhs_storage);
virtual void emit_block_hints(const SPIRBlock &block);
virtual std::string to_initializer_expression(const SPIRVariable &var);
virtual std::string to_zero_initialized_expression(uint32_t type_id);
flush_all_active_variables();
}
-void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass lhs_storage,
- StorageClass rhs_storage)
+static bool storage_class_array_is_thread(StorageClass storage)
+{
+ switch (storage)
+ {
+ case StorageClassInput:
+ case StorageClassOutput:
+ case StorageClassGeneric:
+ case StorageClassFunction:
+ case StorageClassPrivate:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+void CompilerMSL::emit_array_copy(const string &lhs, uint32_t lhs_id, uint32_t rhs_id,
+ StorageClass lhs_storage, StorageClass rhs_storage)
{
// Allow Metal to use the array<T> template to make arrays a value type.
// This, however, cannot be used for threadgroup address specifiers, so consider the custom array copy as fallback.
- bool lhs_thread = (lhs_storage == StorageClassOutput || lhs_storage == StorageClassFunction ||
- lhs_storage == StorageClassGeneric || lhs_storage == StorageClassPrivate);
- bool rhs_thread = (rhs_storage == StorageClassInput || rhs_storage == StorageClassFunction ||
- rhs_storage == StorageClassOutput ||
- rhs_storage == StorageClassGeneric || rhs_storage == StorageClassPrivate);
+ bool lhs_is_thread_storage = storage_class_array_is_thread(lhs_storage);
+ bool rhs_is_thread_storage = storage_class_array_is_thread(rhs_storage);
+
+ bool lhs_is_array_template = lhs_is_thread_storage;
+ bool rhs_is_array_template = rhs_is_thread_storage;
+
+ // Special considerations for stage IO variables.
+ // If the variable is actually backed by non-user visible device storage, we use array templates for those.
+ auto *lhs_var = maybe_get_backing_variable(lhs_id);
+ if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage))
+ lhs_is_array_template = true;
+
+ auto *rhs_var = maybe_get_backing_variable(rhs_id);
+ if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage))
+ rhs_is_array_template = true;
// If threadgroup storage qualifiers are *not* used:
// Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier.
- if (lhs_thread && rhs_thread && !using_builtin_array())
+ if (lhs_is_array_template && rhs_is_array_template && !using_builtin_array())
{
statement(lhs, " = ", to_expression(rhs_id), ";");
}
add_spv_func_and_recompile(SPVFuncImplArrayCopy);
const char *tag = nullptr;
- if (lhs_thread && is_constant)
+ if (lhs_is_thread_storage && is_constant)
tag = "FromConstantToStack";
else if (lhs_storage == StorageClassWorkgroup && is_constant)
tag = "FromConstantToThreadGroup";
- else if (lhs_thread && rhs_thread)
+ else if (lhs_is_thread_storage && rhs_is_thread_storage)
tag = "FromStackToStack";
- else if (lhs_storage == StorageClassWorkgroup && rhs_thread)
+ else if (lhs_storage == StorageClassWorkgroup && rhs_is_thread_storage)
tag = "FromStackToThreadGroup";
- else if (lhs_thread && rhs_storage == StorageClassWorkgroup)
+ else if (lhs_is_thread_storage && rhs_storage == StorageClassWorkgroup)
tag = "FromThreadGroupToStack";
else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup)
tag = "FromThreadGroupToThreadGroup";
tag = "FromConstantToDevice";
else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassWorkgroup)
tag = "FromThreadGroupToDevice";
- else if (lhs_storage == StorageClassStorageBuffer && rhs_thread)
+ else if (lhs_storage == StorageClassStorageBuffer && rhs_is_thread_storage)
tag = "FromStackToDevice";
else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassStorageBuffer)
tag = "FromDeviceToThreadGroup";
- else if (lhs_thread && rhs_storage == StorageClassStorageBuffer)
+ else if (lhs_is_thread_storage && rhs_storage == StorageClassStorageBuffer)
tag = "FromDeviceToStack";
else
SPIRV_CROSS_THROW("Unknown storage class used for copying arrays.");
// Pass internal array of spvUnsafeArray<> into wrapper functions
- if (lhs_thread && !msl_options.force_native_arrays)
+ if (lhs_is_array_template && rhs_is_array_template && !msl_options.force_native_arrays)
+ statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ".elements);");
+ if (lhs_is_array_template && !msl_options.force_native_arrays)
statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ");");
- else if (rhs_thread && !msl_options.force_native_arrays)
+ else if (rhs_is_array_template && !msl_options.force_native_arrays)
statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ".elements);");
else
statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");");
if (p_v_lhs)
flush_variable_declaration(p_v_lhs->self);
- emit_array_copy(to_expression(id_lhs), id_rhs, get_expression_effective_storage_class(id_lhs),
- get_expression_effective_storage_class(id_rhs));
+ auto lhs_storage = get_expression_effective_storage_class(id_lhs);
+ auto rhs_storage = get_expression_effective_storage_class(id_rhs);
+ emit_array_copy(to_expression(id_lhs), id_lhs, id_rhs, lhs_storage, rhs_storage);
register_write(id_lhs);
return true;
// This is fine, as there cannot be concurrent writers to that memory anyways,
// so we just ignore that case.
- return capture_output_to_buffer &&
- variable.storage == StorageClassOutput &&
- !is_stage_output_variable_masked(variable);
+ return (variable.storage == StorageClassOutput || variable.storage == StorageClassInput) &&
+ !variable_storage_requires_stage_io(variable.storage) &&
+ (variable.storage != StorageClassOutput || !is_stage_output_variable_masked(variable));
}
else
{
void add_pragma_line(const std::string &line);
void add_typedef_line(const std::string &line);
void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
- void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage,
- spv::StorageClass rhs_storage) override;
+ void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id,
+ spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override;
void build_implicit_builtins();
uint32_t build_constant_uint_array_pointer();
void emit_entry_point_declarations() override;