From ae9ca7d73c6a0ed69167f23584345fab2744e043 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 19 Apr 2021 11:46:30 +0200 Subject: [PATCH] MSL: Fix copy of arrays to/from stage IO variables. Need to take into account effective storage classes and whether or not we target stage IO blocks since native arrays are conditionally enabled. --- ...py-arrays.mask-location-0.msl2.multi-patch.tesc | 188 ++++++++++++++++++++ .../masking/copy-arrays.mask-location-0.msl2.tesc | 191 +++++++++++++++++++++ ...py-arrays.mask-location-1.msl2.multi-patch.tesc | 79 +++++++++ .../masking/copy-arrays.mask-location-1.msl2.tesc | 83 +++++++++ ...py-arrays.mask-location-0.msl2.multi-patch.tesc | 188 ++++++++++++++++++++ .../masking/copy-arrays.mask-location-0.msl2.tesc | 191 +++++++++++++++++++++ ...py-arrays.mask-location-1.msl2.multi-patch.tesc | 79 +++++++++ .../masking/copy-arrays.mask-location-1.msl2.tesc | 83 +++++++++ ...py-arrays.mask-location-0.msl2.multi-patch.tesc | 17 ++ .../masking/copy-arrays.mask-location-0.msl2.tesc | 17 ++ ...py-arrays.mask-location-1.msl2.multi-patch.tesc | 17 ++ .../masking/copy-arrays.mask-location-1.msl2.tesc | 17 ++ spirv_cross.cpp | 25 --- spirv_cross.hpp | 1 - spirv_glsl.cpp | 44 ++++- spirv_glsl.hpp | 5 +- spirv_msl.cpp | 71 +++++--- spirv_msl.hpp | 4 +- 18 files changed, 1240 insertions(+), 60 deletions(-) create mode 100644 reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc create mode 100644 reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc create mode 100644 reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc create mode 100644 reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc create mode 100644 reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc create mode 100644 reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc create mode 100644 reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc create mode 100644 reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc create mode 100644 shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc create mode 100644 shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc create mode 100644 shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc create mode 100644 shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc new file mode 100644 index 0000000..4f9134e --- /dev/null +++ b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc @@ -0,0 +1,188 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + spvUnsafeArray iFoo; + float4 ipFoo; +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup float4 spvStorageFoo[8][4][2]; + threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]); + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements); + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc new file mode 100644 index 0000000..e9dd68d --- /dev/null +++ b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc @@ -0,0 +1,191 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + float4 iFoo_0 [[attribute(0)]]; + float4 iFoo_1 [[attribute(1)]]; + float4 ipFoo [[attribute(2)]]; +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + threadgroup float4 Foo[4][2]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvUnsafeArray _38 = spvUnsafeArray({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 }); + spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements); + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc new file mode 100644 index 0000000..a2ad010 --- /dev/null +++ b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc @@ -0,0 +1,79 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + spvUnsafeArray Foo; + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + spvUnsafeArray iFoo; + float4 ipFoo; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]); + gl_out[gl_InvocationID].gl_Position = float4(1.0); + gl_out[gl_InvocationID].Foo = gl_in[gl_InvocationID].iFoo; + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc new file mode 100644 index 0000000..3da1d18 --- /dev/null +++ b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc @@ -0,0 +1,83 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + spvUnsafeArray Foo; + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + float4 iFoo_0 [[attribute(0)]]; + float4 iFoo_1 [[attribute(1)]]; + float4 ipFoo [[attribute(2)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvUnsafeArray _38 = spvUnsafeArray({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 }); + gl_out[gl_InvocationID].Foo = _38; + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc new file mode 100644 index 0000000..4f9134e --- /dev/null +++ b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc @@ -0,0 +1,188 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + spvUnsafeArray iFoo; + float4 ipFoo; +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + threadgroup float4 spvStorageFoo[8][4][2]; + threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]); + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements); + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc new file mode 100644 index 0000000..e9dd68d --- /dev/null +++ b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc @@ -0,0 +1,191 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + float4 iFoo_0 [[attribute(0)]]; + float4 iFoo_1 [[attribute(1)]]; + float4 ipFoo [[attribute(2)]]; +}; + +template +inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + threadgroup float4 Foo[4][2]; + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvUnsafeArray _38 = spvUnsafeArray({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 }); + spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements); + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc new file mode 100644 index 0000000..a2ad010 --- /dev/null +++ b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc @@ -0,0 +1,79 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + spvUnsafeArray Foo; + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + spvUnsafeArray iFoo; + float4 ipFoo; +}; + +kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) +{ + device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4]; + device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; + uint gl_InvocationID = gl_GlobalInvocationID.x % 4; + uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]); + gl_out[gl_InvocationID].gl_Position = float4(1.0); + gl_out[gl_InvocationID].Foo = gl_in[gl_InvocationID].iFoo; + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc new file mode 100644 index 0000000..3da1d18 --- /dev/null +++ b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc @@ -0,0 +1,83 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct main0_out +{ + spvUnsafeArray Foo; + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray pFoo; +}; + +struct main0_in +{ + float4 iFoo_0 [[attribute(0)]]; + float4 iFoo_1 [[attribute(1)]]; + float4 ipFoo [[attribute(2)]]; +}; + +kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + if (gl_InvocationID < spvIndirectParams[0]) + gl_in[gl_InvocationID] = in; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_InvocationID >= 4) + return; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + spvUnsafeArray _38 = spvUnsafeArray({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 }); + gl_out[gl_InvocationID].Foo = _38; + if (gl_InvocationID == 0) + { + spvUnsafeArray _56 = spvUnsafeArray({ gl_in[0].ipFoo, gl_in[1].ipFoo }); + patchOut.pFoo = _56; + } +} + diff --git a/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc new file mode 100644 index 0000000..e69a7a1 --- /dev/null +++ b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc @@ -0,0 +1,17 @@ +#version 450 + +layout(vertices = 4) out; +layout(location = 0) out vec4 Foo[][2]; +layout(location = 0) in vec4 iFoo[][2]; +layout(location = 2) patch out vec4 pFoo[2]; +layout(location = 2) in vec4 ipFoo[]; + +void main() +{ + gl_out[gl_InvocationID].gl_Position = vec4(1.0); + Foo[gl_InvocationID] = iFoo[gl_InvocationID]; + if (gl_InvocationID == 0) + { + pFoo = vec4[](ipFoo[0], ipFoo[1]); + } +} diff --git a/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc new file mode 100644 index 0000000..e69a7a1 --- /dev/null +++ b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc @@ -0,0 +1,17 @@ +#version 450 + +layout(vertices = 4) out; +layout(location = 0) out vec4 Foo[][2]; +layout(location = 0) in vec4 iFoo[][2]; +layout(location = 2) patch out vec4 pFoo[2]; +layout(location = 2) in vec4 ipFoo[]; + +void main() +{ + gl_out[gl_InvocationID].gl_Position = vec4(1.0); + Foo[gl_InvocationID] = iFoo[gl_InvocationID]; + if (gl_InvocationID == 0) + { + pFoo = vec4[](ipFoo[0], ipFoo[1]); + } +} diff --git a/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc new file mode 100644 index 0000000..e69a7a1 --- /dev/null +++ b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc @@ -0,0 +1,17 @@ +#version 450 + +layout(vertices = 4) out; +layout(location = 0) out vec4 Foo[][2]; +layout(location = 0) in vec4 iFoo[][2]; +layout(location = 2) patch out vec4 pFoo[2]; +layout(location = 2) in vec4 ipFoo[]; + +void main() +{ + gl_out[gl_InvocationID].gl_Position = vec4(1.0); + Foo[gl_InvocationID] = iFoo[gl_InvocationID]; + if (gl_InvocationID == 0) + { + pFoo = vec4[](ipFoo[0], ipFoo[1]); + } +} diff --git a/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc new file mode 100644 index 0000000..e69a7a1 --- /dev/null +++ b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc @@ -0,0 +1,17 @@ +#version 450 + +layout(vertices = 4) out; +layout(location = 0) out vec4 Foo[][2]; +layout(location = 0) in vec4 iFoo[][2]; +layout(location = 2) patch out vec4 pFoo[2]; +layout(location = 2) in vec4 ipFoo[]; + +void main() +{ + gl_out[gl_InvocationID].gl_Position = vec4(1.0); + Foo[gl_InvocationID] = iFoo[gl_InvocationID]; + if (gl_InvocationID == 0) + { + pFoo = vec4[](ipFoo[0], ipFoo[1]); + } +} diff --git a/spirv_cross.cpp b/spirv_cross.cpp index afd8c50..5bba905 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -284,31 +284,6 @@ SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain) return var; } -StorageClass Compiler::get_expression_effective_storage_class(uint32_t ptr) -{ - auto *var = maybe_get_backing_variable(ptr); - - // If the expression has been lowered to a temporary, we need to use the Generic storage class. - // We're looking for the effective storage class of a given expression. - // An access chain or forwarded OpLoads from such access chains - // will generally have the storage class of the underlying variable, but if the load was not forwarded - // we have lost any address space qualifiers. - bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get(ptr).access_chain && - (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0); - - if (var && !forced_temporary) - { - // Normalize SSBOs to StorageBuffer here. - if (var->storage == StorageClassUniform && - has_decoration(get(var->basetype).self, DecorationBufferBlock)) - return StorageClassStorageBuffer; - else - return var->storage; - } - else - return expression_type(ptr).storage; -} - void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded) { auto &e = get(expr); diff --git a/spirv_cross.hpp b/spirv_cross.hpp index c603f84..764c6c4 100644 --- a/spirv_cross.hpp +++ b/spirv_cross.hpp @@ -671,7 +671,6 @@ protected: bool expression_is_lvalue(uint32_t id) const; bool variable_storage_is_aliased(const SPIRVariable &var); SPIRVariable *maybe_get_backing_variable(uint32_t chain); - spv::StorageClass get_expression_effective_storage_class(uint32_t ptr); void register_read(uint32_t expr, uint32_t chain, bool forwarded); void register_write(uint32_t chain); diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index d987f78..5acbf84 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -8661,12 +8661,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice bool ignore_potential_sliced_writes = false; if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0) { - auto *var = maybe_get_backing_variable(base); - if (var && variable_decl_is_remapped_storage(*var, StorageClassWorkgroup)) - effective_storage = StorageClassWorkgroup; - else if (var && variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer)) - effective_storage = StorageClassStorageBuffer; - else if (expression_type(base).pointer) + if (expression_type(base).pointer) effective_storage = get_expression_effective_storage_class(base); // Special consideration for control points. @@ -8674,6 +8669,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice // to consider scalar access chains here. // Cleans up some cases where it's very painful to determine the accurate storage class // since blocks can be partially masked ... + auto *var = maybe_get_backing_variable(base); if (var && var->storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationControl && !has_decoration(var->self, DecorationPatch)) @@ -9911,7 +9907,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // it is an array, and our backend does not support arrays as value types. // Emit the temporary, and copy it explicitly. e = &emit_uninitialized_temporary_expression(result_type, id); - emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_expression_effective_storage_class(ptr)); + emit_array_copy(to_expression(id), id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr)); } else e = &emit_op(result_type, id, expr, forward, !usage_tracking); @@ -14794,7 +14790,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) // The backend is responsible for setting this up, and redirection the return values as appropriate. if (ir.ids[block.return_value].get_type() != TypeUndef) { - emit_array_copy("spvReturnValue", block.return_value, StorageClassFunction, + emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction, get_expression_effective_storage_class(block.return_value)); } @@ -15013,7 +15009,7 @@ uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); } -void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass) +void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass) { statement(lhs, " = ", to_expression(rhs_id), ";"); } @@ -15923,3 +15919,33 @@ uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, return location; } + +StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr) +{ + auto *var = maybe_get_backing_variable(ptr); + + // If the expression has been lowered to a temporary, we need to use the Generic storage class. + // We're looking for the effective storage class of a given expression. + // An access chain or forwarded OpLoads from such access chains + // will generally have the storage class of the underlying variable, but if the load was not forwarded + // we have lost any address space qualifiers. + bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get(ptr).access_chain && + (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0); + + if (var && !forced_temporary) + { + if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup)) + return StorageClassWorkgroup; + if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer)) + return StorageClassStorageBuffer; + + // Normalize SSBOs to StorageBuffer here. + if (var->storage == StorageClassUniform && + has_decoration(get(var->basetype).self, DecorationBufferBlock)) + return StorageClassStorageBuffer; + else + return var->storage; + } + else + return expression_type(ptr).storage; +} diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index b6d16d3..ae01f5c 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -673,6 +673,7 @@ protected: std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags, AccessChainMeta *meta); + spv::StorageClass get_expression_effective_storage_class(uint32_t ptr); virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base); virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, @@ -739,8 +740,8 @@ protected: std::string layout_for_variable(const SPIRVariable &variable); std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id); virtual bool skip_argument(uint32_t id) const; - virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage, - spv::StorageClass rhs_storage); + virtual void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id, + spv::StorageClass lhs_storage, spv::StorageClass rhs_storage); virtual void emit_block_hints(const SPIRBlock &block); virtual std::string to_initializer_expression(const SPIRVariable &var); virtual std::string to_zero_initialized_expression(uint32_t type_id); diff --git a/spirv_msl.cpp b/spirv_msl.cpp index cc42686..f904319 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -8397,20 +8397,46 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin flush_all_active_variables(); } -void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass lhs_storage, - StorageClass rhs_storage) +static bool storage_class_array_is_thread(StorageClass storage) +{ + switch (storage) + { + case StorageClassInput: + case StorageClassOutput: + case StorageClassGeneric: + case StorageClassFunction: + case StorageClassPrivate: + return true; + + default: + return false; + } +} + +void CompilerMSL::emit_array_copy(const string &lhs, uint32_t lhs_id, uint32_t rhs_id, + StorageClass lhs_storage, StorageClass rhs_storage) { // Allow Metal to use the array template to make arrays a value type. // This, however, cannot be used for threadgroup address specifiers, so consider the custom array copy as fallback. - bool lhs_thread = (lhs_storage == StorageClassOutput || lhs_storage == StorageClassFunction || - lhs_storage == StorageClassGeneric || lhs_storage == StorageClassPrivate); - bool rhs_thread = (rhs_storage == StorageClassInput || rhs_storage == StorageClassFunction || - rhs_storage == StorageClassOutput || - rhs_storage == StorageClassGeneric || rhs_storage == StorageClassPrivate); + bool lhs_is_thread_storage = storage_class_array_is_thread(lhs_storage); + bool rhs_is_thread_storage = storage_class_array_is_thread(rhs_storage); + + bool lhs_is_array_template = lhs_is_thread_storage; + bool rhs_is_array_template = rhs_is_thread_storage; + + // Special considerations for stage IO variables. + // If the variable is actually backed by non-user visible device storage, we use array templates for those. + auto *lhs_var = maybe_get_backing_variable(lhs_id); + if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage)) + lhs_is_array_template = true; + + auto *rhs_var = maybe_get_backing_variable(rhs_id); + if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage)) + rhs_is_array_template = true; // If threadgroup storage qualifiers are *not* used: // Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier. - if (lhs_thread && rhs_thread && !using_builtin_array()) + if (lhs_is_array_template && rhs_is_array_template && !using_builtin_array()) { statement(lhs, " = ", to_expression(rhs_id), ";"); } @@ -8452,15 +8478,15 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla add_spv_func_and_recompile(SPVFuncImplArrayCopy); const char *tag = nullptr; - if (lhs_thread && is_constant) + if (lhs_is_thread_storage && is_constant) tag = "FromConstantToStack"; else if (lhs_storage == StorageClassWorkgroup && is_constant) tag = "FromConstantToThreadGroup"; - else if (lhs_thread && rhs_thread) + else if (lhs_is_thread_storage && rhs_is_thread_storage) tag = "FromStackToStack"; - else if (lhs_storage == StorageClassWorkgroup && rhs_thread) + else if (lhs_storage == StorageClassWorkgroup && rhs_is_thread_storage) tag = "FromStackToThreadGroup"; - else if (lhs_thread && rhs_storage == StorageClassWorkgroup) + else if (lhs_is_thread_storage && rhs_storage == StorageClassWorkgroup) tag = "FromThreadGroupToStack"; else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup) tag = "FromThreadGroupToThreadGroup"; @@ -8470,19 +8496,21 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla tag = "FromConstantToDevice"; else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassWorkgroup) tag = "FromThreadGroupToDevice"; - else if (lhs_storage == StorageClassStorageBuffer && rhs_thread) + else if (lhs_storage == StorageClassStorageBuffer && rhs_is_thread_storage) tag = "FromStackToDevice"; else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassStorageBuffer) tag = "FromDeviceToThreadGroup"; - else if (lhs_thread && rhs_storage == StorageClassStorageBuffer) + else if (lhs_is_thread_storage && rhs_storage == StorageClassStorageBuffer) tag = "FromDeviceToStack"; else SPIRV_CROSS_THROW("Unknown storage class used for copying arrays."); // Pass internal array of spvUnsafeArray<> into wrapper functions - if (lhs_thread && !msl_options.force_native_arrays) + if (lhs_is_array_template && rhs_is_array_template && !msl_options.force_native_arrays) + statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ".elements);"); + if (lhs_is_array_template && !msl_options.force_native_arrays) statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ");"); - else if (rhs_thread && !msl_options.force_native_arrays) + else if (rhs_is_array_template && !msl_options.force_native_arrays) statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ".elements);"); else statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");"); @@ -8549,8 +8577,9 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs) if (p_v_lhs) flush_variable_declaration(p_v_lhs->self); - emit_array_copy(to_expression(id_lhs), id_rhs, get_expression_effective_storage_class(id_lhs), - get_expression_effective_storage_class(id_rhs)); + auto lhs_storage = get_expression_effective_storage_class(id_lhs); + auto rhs_storage = get_expression_effective_storage_class(id_rhs); + emit_array_copy(to_expression(id_lhs), id_lhs, id_rhs, lhs_storage, rhs_storage); register_write(id_lhs); return true; @@ -13242,9 +13271,9 @@ bool CompilerMSL::variable_decl_is_remapped_storage(const SPIRVariable &variable // This is fine, as there cannot be concurrent writers to that memory anyways, // so we just ignore that case. - return capture_output_to_buffer && - variable.storage == StorageClassOutput && - !is_stage_output_variable_masked(variable); + return (variable.storage == StorageClassOutput || variable.storage == StorageClassInput) && + !variable_storage_requires_stage_io(variable.storage) && + (variable.storage != StorageClassOutput || !is_stage_output_variable_masked(variable)); } else { diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 65d596d..97800ab 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -913,8 +913,8 @@ protected: void add_pragma_line(const std::string &line); void add_typedef_line(const std::string &line); void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem); - void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage, - spv::StorageClass rhs_storage) override; + void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id, + spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override; void build_implicit_builtins(); uint32_t build_constant_uint_array_pointer(); void emit_entry_point_declarations() override; -- 2.7.4