3 #extension GL_KHR_shader_subgroup_arithmetic: enable
\r
5 layout (local_size_x = 8) in;
\r
7 layout(binding = 0) buffer Buffers
\r
16 uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4u;
\r
18 data[0].f4.x = subgroupAdd(data[0].f4.x);
\r
19 data[0].f4.xy = subgroupAdd(data[1].f4.xy);
\r
20 data[0].f4.xyz = subgroupAdd(data[2].f4.xyz);
\r
21 data[0].f4 = subgroupAdd(data[3].f4);
\r
23 data[1].i4.x = subgroupAdd(data[0].i4.x);
\r
24 data[1].i4.xy = subgroupAdd(data[1].i4.xy);
\r
25 data[1].i4.xyz = subgroupAdd(data[2].i4.xyz);
\r
26 data[1].i4 = subgroupAdd(data[3].i4);
\r
28 data[2].u4.x = subgroupAdd(data[0].u4.x);
\r
29 data[2].u4.xy = subgroupAdd(data[1].u4.xy);
\r
30 data[2].u4.xyz = subgroupAdd(data[2].u4.xyz);
\r
31 data[2].u4 = subgroupAdd(data[3].u4);
\r
33 data[3].f4.x = subgroupMul(data[0].f4.x);
\r
34 data[3].f4.xy = subgroupMul(data[1].f4.xy);
\r
35 data[3].f4.xyz = subgroupMul(data[2].f4.xyz);
\r
36 data[3].f4 = subgroupMul(data[3].f4);
\r
38 data[0].i4.x = subgroupMul(data[0].i4.x);
\r
39 data[0].i4.xy = subgroupMul(data[1].i4.xy);
\r
40 data[0].i4.xyz = subgroupMul(data[2].i4.xyz);
\r
41 data[0].i4 = subgroupMul(data[3].i4);
\r
43 data[1].u4.x = subgroupMul(data[0].u4.x);
\r
44 data[1].u4.xy = subgroupMul(data[1].u4.xy);
\r
45 data[1].u4.xyz = subgroupMul(data[2].u4.xyz);
\r
46 data[1].u4 = subgroupMul(data[3].u4);
\r
48 data[2].f4.x = subgroupMin(data[0].f4.x);
\r
49 data[2].f4.xy = subgroupMin(data[1].f4.xy);
\r
50 data[2].f4.xyz = subgroupMin(data[2].f4.xyz);
\r
51 data[2].f4 = subgroupMin(data[3].f4);
\r
53 data[3].i4.x = subgroupMin(data[0].i4.x);
\r
54 data[3].i4.xy = subgroupMin(data[1].i4.xy);
\r
55 data[3].i4.xyz = subgroupMin(data[2].i4.xyz);
\r
56 data[3].i4 = subgroupMin(data[3].i4);
\r
58 data[0].u4.x = subgroupMin(data[0].u4.x);
\r
59 data[0].u4.xy = subgroupMin(data[1].u4.xy);
\r
60 data[0].u4.xyz = subgroupMin(data[2].u4.xyz);
\r
61 data[0].u4 = subgroupMin(data[3].u4);
\r
63 data[1].f4.x = subgroupMax(data[0].f4.x);
\r
64 data[1].f4.xy = subgroupMax(data[1].f4.xy);
\r
65 data[1].f4.xyz = subgroupMax(data[2].f4.xyz);
\r
66 data[1].f4 = subgroupMax(data[3].f4);
\r
68 data[2].i4.x = subgroupMax(data[0].i4.x);
\r
69 data[2].i4.xy = subgroupMax(data[1].i4.xy);
\r
70 data[2].i4.xyz = subgroupMax(data[2].i4.xyz);
\r
71 data[2].i4 = subgroupMax(data[3].i4);
\r
73 data[3].u4.x = subgroupMax(data[0].u4.x);
\r
74 data[3].u4.xy = subgroupMax(data[1].u4.xy);
\r
75 data[3].u4.xyz = subgroupMax(data[2].u4.xyz);
\r
76 data[3].u4 = subgroupMax(data[3].u4);
\r
78 data[0].i4.x = subgroupAnd(data[0].i4.x);
\r
79 data[0].i4.xy = subgroupAnd(data[1].i4.xy);
\r
80 data[0].i4.xyz = subgroupAnd(data[2].i4.xyz);
\r
81 data[0].i4 = subgroupAnd(data[3].i4);
\r
83 data[1].u4.x = subgroupAnd(data[0].u4.x);
\r
84 data[1].u4.xy = subgroupAnd(data[1].u4.xy);
\r
85 data[1].u4.xyz = subgroupAnd(data[2].u4.xyz);
\r
86 data[1].u4 = subgroupAnd(data[3].u4);
\r
88 data[2].i4.x = int(subgroupAnd(data[0].i4.x < 0));
\r
89 data[2].i4.xy = ivec2(subgroupAnd(lessThan(data[1].i4.xy, ivec2(0))));
\r
90 data[2].i4.xyz = ivec3(subgroupAnd(lessThan(data[1].i4.xyz, ivec3(0))));
\r
91 data[2].i4 = ivec4(subgroupAnd(lessThan(data[1].i4, ivec4(0))));
\r
93 data[3].i4.x = subgroupOr(data[0].i4.x);
\r
94 data[3].i4.xy = subgroupOr(data[1].i4.xy);
\r
95 data[3].i4.xyz = subgroupOr(data[2].i4.xyz);
\r
96 data[3].i4 = subgroupOr(data[3].i4);
\r
98 data[0].u4.x = subgroupOr(data[0].u4.x);
\r
99 data[0].u4.xy = subgroupOr(data[1].u4.xy);
\r
100 data[0].u4.xyz = subgroupOr(data[2].u4.xyz);
\r
101 data[0].u4 = subgroupOr(data[3].u4);
\r
103 data[1].i4.x = int(subgroupOr(data[0].i4.x < 0));
\r
104 data[1].i4.xy = ivec2(subgroupOr(lessThan(data[1].i4.xy, ivec2(0))));
\r
105 data[1].i4.xyz = ivec3(subgroupOr(lessThan(data[1].i4.xyz, ivec3(0))));
\r
106 data[1].i4 = ivec4(subgroupOr(lessThan(data[1].i4, ivec4(0))));
\r
108 data[2].i4.x = subgroupXor(data[0].i4.x);
\r
109 data[2].i4.xy = subgroupXor(data[1].i4.xy);
\r
110 data[2].i4.xyz = subgroupXor(data[2].i4.xyz);
\r
111 data[2].i4 = subgroupXor(data[3].i4);
\r
113 data[3].u4.x = subgroupXor(data[0].u4.x);
\r
114 data[3].u4.xy = subgroupXor(data[1].u4.xy);
\r
115 data[3].u4.xyz = subgroupXor(data[2].u4.xyz);
\r
116 data[3].u4 = subgroupXor(data[3].u4);
\r
118 data[0].i4.x = int(subgroupXor(data[0].i4.x < 0));
\r
119 data[0].i4.xy = ivec2(subgroupXor(lessThan(data[1].i4.xy, ivec2(0))));
\r
120 data[0].i4.xyz = ivec3(subgroupXor(lessThan(data[1].i4.xyz, ivec3(0))));
\r
121 data[0].i4 = ivec4(subgroupXor(lessThan(data[1].i4, ivec4(0))));
\r
123 data[1].f4.x = subgroupInclusiveAdd(data[0].f4.x);
\r
124 data[1].f4.xy = subgroupInclusiveAdd(data[1].f4.xy);
\r
125 data[1].f4.xyz = subgroupInclusiveAdd(data[2].f4.xyz);
\r
126 data[1].f4 = subgroupInclusiveAdd(data[3].f4);
\r
128 data[2].i4.x = subgroupInclusiveAdd(data[0].i4.x);
\r
129 data[2].i4.xy = subgroupInclusiveAdd(data[1].i4.xy);
\r
130 data[2].i4.xyz = subgroupInclusiveAdd(data[2].i4.xyz);
\r
131 data[2].i4 = subgroupInclusiveAdd(data[3].i4);
\r
133 data[3].u4.x = subgroupInclusiveAdd(data[0].u4.x);
\r
134 data[3].u4.xy = subgroupInclusiveAdd(data[1].u4.xy);
\r
135 data[3].u4.xyz = subgroupInclusiveAdd(data[2].u4.xyz);
\r
136 data[3].u4 = subgroupInclusiveAdd(data[3].u4);
\r
138 data[0].f4.x = subgroupInclusiveMul(data[0].f4.x);
\r
139 data[0].f4.xy = subgroupInclusiveMul(data[1].f4.xy);
\r
140 data[0].f4.xyz = subgroupInclusiveMul(data[2].f4.xyz);
\r
141 data[0].f4 = subgroupInclusiveMul(data[3].f4);
\r
143 data[1].i4.x = subgroupInclusiveMul(data[0].i4.x);
\r
144 data[1].i4.xy = subgroupInclusiveMul(data[1].i4.xy);
\r
145 data[1].i4.xyz = subgroupInclusiveMul(data[2].i4.xyz);
\r
146 data[1].i4 = subgroupInclusiveMul(data[3].i4);
\r
148 data[2].u4.x = subgroupInclusiveMul(data[0].u4.x);
\r
149 data[2].u4.xy = subgroupInclusiveMul(data[1].u4.xy);
\r
150 data[2].u4.xyz = subgroupInclusiveMul(data[2].u4.xyz);
\r
151 data[2].u4 = subgroupInclusiveMul(data[3].u4);
\r
153 data[3].f4.x = subgroupInclusiveMin(data[0].f4.x);
\r
154 data[3].f4.xy = subgroupInclusiveMin(data[1].f4.xy);
\r
155 data[3].f4.xyz = subgroupInclusiveMin(data[2].f4.xyz);
\r
156 data[3].f4 = subgroupInclusiveMin(data[3].f4);
\r
158 data[0].i4.x = subgroupInclusiveMin(data[0].i4.x);
\r
159 data[0].i4.xy = subgroupInclusiveMin(data[1].i4.xy);
\r
160 data[0].i4.xyz = subgroupInclusiveMin(data[2].i4.xyz);
\r
161 data[0].i4 = subgroupInclusiveMin(data[3].i4);
\r
163 data[1].u4.x = subgroupInclusiveMin(data[0].u4.x);
\r
164 data[1].u4.xy = subgroupInclusiveMin(data[1].u4.xy);
\r
165 data[1].u4.xyz = subgroupInclusiveMin(data[2].u4.xyz);
\r
166 data[1].u4 = subgroupInclusiveMin(data[3].u4);
\r
168 data[2].f4.x = subgroupInclusiveMax(data[0].f4.x);
\r
169 data[2].f4.xy = subgroupInclusiveMax(data[1].f4.xy);
\r
170 data[2].f4.xyz = subgroupInclusiveMax(data[2].f4.xyz);
\r
171 data[2].f4 = subgroupInclusiveMax(data[3].f4);
\r
173 data[3].i4.x = subgroupInclusiveMax(data[0].i4.x);
\r
174 data[3].i4.xy = subgroupInclusiveMax(data[1].i4.xy);
\r
175 data[3].i4.xyz = subgroupInclusiveMax(data[2].i4.xyz);
\r
176 data[3].i4 = subgroupInclusiveMax(data[3].i4);
\r
178 data[0].u4.x = subgroupInclusiveMax(data[0].u4.x);
\r
179 data[0].u4.xy = subgroupInclusiveMax(data[1].u4.xy);
\r
180 data[0].u4.xyz = subgroupInclusiveMax(data[2].u4.xyz);
\r
181 data[0].u4 = subgroupInclusiveMax(data[3].u4);
\r
183 data[1].i4.x = subgroupInclusiveAnd(data[0].i4.x);
\r
184 data[1].i4.xy = subgroupInclusiveAnd(data[1].i4.xy);
\r
185 data[1].i4.xyz = subgroupInclusiveAnd(data[2].i4.xyz);
\r
186 data[1].i4 = subgroupInclusiveAnd(data[3].i4);
\r
188 data[2].u4.x = subgroupInclusiveAnd(data[0].u4.x);
\r
189 data[2].u4.xy = subgroupInclusiveAnd(data[1].u4.xy);
\r
190 data[2].u4.xyz = subgroupInclusiveAnd(data[2].u4.xyz);
\r
191 data[2].u4 = subgroupInclusiveAnd(data[3].u4);
\r
193 data[3].i4.x = int(subgroupInclusiveAnd(data[0].i4.x < 0));
\r
194 data[3].i4.xy = ivec2(subgroupInclusiveAnd(lessThan(data[1].i4.xy, ivec2(0))));
\r
195 data[3].i4.xyz = ivec3(subgroupInclusiveAnd(lessThan(data[1].i4.xyz, ivec3(0))));
\r
196 data[3].i4 = ivec4(subgroupInclusiveAnd(lessThan(data[1].i4, ivec4(0))));
\r
198 data[0].i4.x = subgroupInclusiveOr(data[0].i4.x);
\r
199 data[0].i4.xy = subgroupInclusiveOr(data[1].i4.xy);
\r
200 data[0].i4.xyz = subgroupInclusiveOr(data[2].i4.xyz);
\r
201 data[0].i4 = subgroupInclusiveOr(data[3].i4);
\r
203 data[1].u4.x = subgroupInclusiveOr(data[0].u4.x);
\r
204 data[1].u4.xy = subgroupInclusiveOr(data[1].u4.xy);
\r
205 data[1].u4.xyz = subgroupInclusiveOr(data[2].u4.xyz);
\r
206 data[1].u4 = subgroupInclusiveOr(data[3].u4);
\r
208 data[2].i4.x = int(subgroupInclusiveOr(data[0].i4.x < 0));
\r
209 data[2].i4.xy = ivec2(subgroupInclusiveOr(lessThan(data[1].i4.xy, ivec2(0))));
\r
210 data[2].i4.xyz = ivec3(subgroupInclusiveOr(lessThan(data[1].i4.xyz, ivec3(0))));
\r
211 data[2].i4 = ivec4(subgroupInclusiveOr(lessThan(data[1].i4, ivec4(0))));
\r
213 data[3].i4.x = subgroupInclusiveXor(data[0].i4.x);
\r
214 data[3].i4.xy = subgroupInclusiveXor(data[1].i4.xy);
\r
215 data[3].i4.xyz = subgroupInclusiveXor(data[2].i4.xyz);
\r
216 data[3].i4 = subgroupInclusiveXor(data[3].i4);
\r
218 data[0].u4.x = subgroupInclusiveXor(data[0].u4.x);
\r
219 data[0].u4.xy = subgroupInclusiveXor(data[1].u4.xy);
\r
220 data[0].u4.xyz = subgroupInclusiveXor(data[2].u4.xyz);
\r
221 data[0].u4 = subgroupInclusiveXor(data[3].u4);
\r
223 data[1].i4.x = int(subgroupInclusiveXor(data[0].i4.x < 0));
\r
224 data[1].i4.xy = ivec2(subgroupInclusiveXor(lessThan(data[1].i4.xy, ivec2(0))));
\r
225 data[1].i4.xyz = ivec3(subgroupInclusiveXor(lessThan(data[1].i4.xyz, ivec3(0))));
\r
226 data[1].i4 = ivec4(subgroupInclusiveXor(lessThan(data[1].i4, ivec4(0))));
\r
228 data[2].f4.x = subgroupExclusiveAdd(data[0].f4.x);
\r
229 data[2].f4.xy = subgroupExclusiveAdd(data[1].f4.xy);
\r
230 data[2].f4.xyz = subgroupExclusiveAdd(data[2].f4.xyz);
\r
231 data[2].f4 = subgroupExclusiveAdd(data[3].f4);
\r
233 data[3].i4.x = subgroupExclusiveAdd(data[0].i4.x);
\r
234 data[3].i4.xy = subgroupExclusiveAdd(data[1].i4.xy);
\r
235 data[3].i4.xyz = subgroupExclusiveAdd(data[2].i4.xyz);
\r
236 data[3].i4 = subgroupExclusiveAdd(data[3].i4);
\r
238 data[0].u4.x = subgroupExclusiveAdd(data[0].u4.x);
\r
239 data[0].u4.xy = subgroupExclusiveAdd(data[1].u4.xy);
\r
240 data[0].u4.xyz = subgroupExclusiveAdd(data[2].u4.xyz);
\r
241 data[0].u4 = subgroupExclusiveAdd(data[3].u4);
\r
243 data[1].f4.x = subgroupExclusiveMul(data[0].f4.x);
\r
244 data[1].f4.xy = subgroupExclusiveMul(data[1].f4.xy);
\r
245 data[1].f4.xyz = subgroupExclusiveMul(data[2].f4.xyz);
\r
246 data[1].f4 = subgroupExclusiveMul(data[3].f4);
\r
248 data[2].i4.x = subgroupExclusiveMul(data[0].i4.x);
\r
249 data[2].i4.xy = subgroupExclusiveMul(data[1].i4.xy);
\r
250 data[2].i4.xyz = subgroupExclusiveMul(data[2].i4.xyz);
\r
251 data[2].i4 = subgroupExclusiveMul(data[3].i4);
\r
253 data[3].u4.x = subgroupExclusiveMul(data[0].u4.x);
\r
254 data[3].u4.xy = subgroupExclusiveMul(data[1].u4.xy);
\r
255 data[3].u4.xyz = subgroupExclusiveMul(data[2].u4.xyz);
\r
256 data[3].u4 = subgroupExclusiveMul(data[3].u4);
\r
258 data[0].f4.x = subgroupExclusiveMin(data[0].f4.x);
\r
259 data[0].f4.xy = subgroupExclusiveMin(data[1].f4.xy);
\r
260 data[0].f4.xyz = subgroupExclusiveMin(data[2].f4.xyz);
\r
261 data[0].f4 = subgroupExclusiveMin(data[3].f4);
\r
263 data[1].i4.x = subgroupExclusiveMin(data[0].i4.x);
\r
264 data[1].i4.xy = subgroupExclusiveMin(data[1].i4.xy);
\r
265 data[1].i4.xyz = subgroupExclusiveMin(data[2].i4.xyz);
\r
266 data[1].i4 = subgroupExclusiveMin(data[3].i4);
\r
268 data[2].u4.x = subgroupExclusiveMin(data[0].u4.x);
\r
269 data[2].u4.xy = subgroupExclusiveMin(data[1].u4.xy);
\r
270 data[2].u4.xyz = subgroupExclusiveMin(data[2].u4.xyz);
\r
271 data[2].u4 = subgroupExclusiveMin(data[3].u4);
\r
273 data[3].f4.x = subgroupExclusiveMax(data[0].f4.x);
\r
274 data[3].f4.xy = subgroupExclusiveMax(data[1].f4.xy);
\r
275 data[3].f4.xyz = subgroupExclusiveMax(data[2].f4.xyz);
\r
276 data[3].f4 = subgroupExclusiveMax(data[3].f4);
\r
278 data[0].i4.x = subgroupExclusiveMax(data[0].i4.x);
\r
279 data[0].i4.xy = subgroupExclusiveMax(data[1].i4.xy);
\r
280 data[0].i4.xyz = subgroupExclusiveMax(data[2].i4.xyz);
\r
281 data[0].i4 = subgroupExclusiveMax(data[3].i4);
\r
283 data[1].u4.x = subgroupExclusiveMax(data[0].u4.x);
\r
284 data[1].u4.xy = subgroupExclusiveMax(data[1].u4.xy);
\r
285 data[1].u4.xyz = subgroupExclusiveMax(data[2].u4.xyz);
\r
286 data[1].u4 = subgroupExclusiveMax(data[3].u4);
\r
288 data[2].i4.x = subgroupExclusiveAnd(data[0].i4.x);
\r
289 data[2].i4.xy = subgroupExclusiveAnd(data[1].i4.xy);
\r
290 data[2].i4.xyz = subgroupExclusiveAnd(data[2].i4.xyz);
\r
291 data[2].i4 = subgroupExclusiveAnd(data[3].i4);
\r
293 data[3].u4.x = subgroupExclusiveAnd(data[0].u4.x);
\r
294 data[3].u4.xy = subgroupExclusiveAnd(data[1].u4.xy);
\r
295 data[3].u4.xyz = subgroupExclusiveAnd(data[2].u4.xyz);
\r
296 data[3].u4 = subgroupExclusiveAnd(data[3].u4);
\r
298 data[0].i4.x = int(subgroupExclusiveAnd(data[0].i4.x < 0));
\r
299 data[0].i4.xy = ivec2(subgroupExclusiveAnd(lessThan(data[1].i4.xy, ivec2(0))));
\r
300 data[0].i4.xyz = ivec3(subgroupExclusiveAnd(lessThan(data[1].i4.xyz, ivec3(0))));
\r
301 data[0].i4 = ivec4(subgroupExclusiveAnd(lessThan(data[1].i4, ivec4(0))));
\r
303 data[1].i4.x = subgroupExclusiveOr(data[0].i4.x);
\r
304 data[1].i4.xy = subgroupExclusiveOr(data[1].i4.xy);
\r
305 data[1].i4.xyz = subgroupExclusiveOr(data[2].i4.xyz);
\r
306 data[1].i4 = subgroupExclusiveOr(data[3].i4);
\r
308 data[2].u4.x = subgroupExclusiveOr(data[0].u4.x);
\r
309 data[2].u4.xy = subgroupExclusiveOr(data[1].u4.xy);
\r
310 data[2].u4.xyz = subgroupExclusiveOr(data[2].u4.xyz);
\r
311 data[2].u4 = subgroupExclusiveOr(data[3].u4);
\r
313 data[3].i4.x = int(subgroupExclusiveOr(data[0].i4.x < 0));
\r
314 data[3].i4.xy = ivec2(subgroupExclusiveOr(lessThan(data[1].i4.xy, ivec2(0))));
\r
315 data[3].i4.xyz = ivec3(subgroupExclusiveOr(lessThan(data[1].i4.xyz, ivec3(0))));
\r
316 data[3].i4 = ivec4(subgroupExclusiveOr(lessThan(data[1].i4, ivec4(0))));
\r
318 data[0].i4.x = subgroupExclusiveXor(data[0].i4.x);
\r
319 data[0].i4.xy = subgroupExclusiveXor(data[1].i4.xy);
\r
320 data[0].i4.xyz = subgroupExclusiveXor(data[2].i4.xyz);
\r
321 data[0].i4 = subgroupExclusiveXor(data[3].i4);
\r
323 data[1].u4.x = subgroupExclusiveXor(data[0].u4.x);
\r
324 data[1].u4.xy = subgroupExclusiveXor(data[1].u4.xy);
\r
325 data[1].u4.xyz = subgroupExclusiveXor(data[2].u4.xyz);
\r
326 data[1].u4 = subgroupExclusiveXor(data[3].u4);
\r
328 data[2].i4.x = int(subgroupExclusiveXor(data[0].i4.x < 0));
\r
329 data[2].i4.xy = ivec2(subgroupExclusiveXor(lessThan(data[1].i4.xy, ivec2(0))));
\r
330 data[2].i4.xyz = ivec3(subgroupExclusiveXor(lessThan(data[1].i4.xyz, ivec3(0))));
\r
331 data[2].i4 = ivec4(subgroupExclusiveXor(lessThan(data[1].i4, ivec4(0))));
\r