3 #extension GL_AMD_gpu_shader_half_float: enable
\r
4 #extension GL_ARB_gpu_shader_int64: enable
\r
10 // Half float literals
\r
13 const float16_t f16c = 0.000001hf;
\r
14 const f16vec2 f16cv = f16vec2(-0.25HF, 0.03HF);
\r
21 // Block memory layout
\r
24 float16_t x; // rule 1: align = 2, takes offsets 0-1
\r
25 f16vec2 y; // rule 2: align = 4, takes offsets 4-7
\r
26 f16vec3 z; // rule 3: align = 8, takes offsets 8-13
\r
29 layout(column_major, std140) uniform B1
\r
31 float16_t a; // rule 1: align = 2, takes offsets 0-1
\r
32 f16vec2 b; // rule 2: align = 4, takes offsets 4-7
\r
33 f16vec3 c; // rule 3: align = 8, takes offsets 8-15
\r
34 float16_t d[2]; // rule 4: align = 16, array stride = 16,
\r
35 // takes offsets 16-47
\r
36 f16mat2x3 e; // rule 5: align = 16, matrix stride = 16,
\r
37 // takes offsets 48-79
\r
38 f16mat2x3 f[2]; // rule 6: align = 16, matrix stride = 16,
\r
39 // array stride = 32, f[0] takes
\r
40 // offsets 80-111, f[1] takes offsets
\r
42 S g; // rule 9: align = 16, g.x takes offsets
\r
43 // 144-145, g.y takes offsets 148-151,
\r
44 // g.z takes offsets 152-159
\r
45 S h[2]; // rule 10: align = 16, array stride = 16, h[0]
\r
46 // takes offsets 160-175, h[1] takes
\r
50 layout(row_major, std430) buffer B2
\r
52 float16_t o; // rule 1: align = 2, takes offsets 0-1
\r
53 f16vec2 p; // rule 2: align = 4, takes offsets 4-7
\r
54 f16vec3 q; // rule 3: align = 8, takes offsets 8-13
\r
55 float16_t r[2]; // rule 4: align = 2, array stride = 2, takes
\r
57 f16mat2x3 s; // rule 7: align = 4, matrix stride = 4, takes
\r
59 f16mat2x3 t[2]; // rule 8: align = 4, matrix stride = 4, array
\r
60 // stride = 12, t[0] takes offsets
\r
61 // 32-43, t[1] takes offsets 44-55
\r
62 S u; // rule 9: align = 8, u.x takes offsets
\r
63 // 56-57, u.y takes offsets 60-63, u.z
\r
64 // takes offsets 64-69
\r
65 S v[2]; // rule 10: align = 8, array stride = 16, v[0]
\r
66 // takes offsets 72-87, v[1] takes
\r
70 // Specialization constant
\r
71 layout(constant_id = 100) const float16_t sf16 = 0.125hf;
\r
72 layout(constant_id = 101) const float sf = 0.25;
\r
73 layout(constant_id = 102) const double sd = 0.5lf;
\r
75 const float f16_to_f = float(sf16);
\r
76 const double f16_to_d = float(sf16);
\r
78 const float16_t f_to_f16 = float16_t(sf);
\r
79 const float16_t d_to_f16 = float16_t(sd);
\r
100 f16 = f16v.x + f16v.y;
\r
101 f16 = f16v.x - f16v.y;
\r
102 f16 = f16v.x * f16v.y;
\r
103 f16 = f16v.x / f16v.y;
\r
106 b = (f16v.x != f16);
\r
107 b = (f16v.y == f16);
\r
108 b = (f16v.x > f16);
\r
109 b = (f16v.y < f16);
\r
110 b = (f16v.x >= f16);
\r
111 b = (f16v.y <= f16);
\r
113 // Vector/matrix operations
\r
116 f16v = f16m * f16v;
\r
117 f16v = f16v * f16m;
\r
118 f16m = f16m * f16m;
\r
133 f16v = f16vec3(bv); // bool -> float16
\r
134 bv = bvec3(f16v); // float16 -> bool
\r
136 f16v = f16vec3(fv); // float -> float16
\r
137 fv = vec3(f16v); // float16 -> float
\r
139 f16v = f16vec3(dv); // double -> float16
\r
140 dv = dvec3(dv); // float16 -> double
\r
142 f16v = f16vec3(iv); // int -> float16
\r
143 iv = ivec3(f16v); // float16 -> int
\r
145 f16v = f16vec3(uv); // uint -> float16
\r
146 uv = uvec3(f16v); // float16 -> uint
\r
148 f16v = f16vec3(i64v); // int64 -> float16
\r
149 i64v = i64vec3(f16v); // float16 -> int64
\r
151 f16v = f16vec3(u64v); // uint64 -> float16
\r
152 u64v = u64vec3(f16v); // float16 -> uint64
\r
155 void builtinAngleTrigFuncs()
\r
157 f16vec4 f16v1, f16v2;
\r
159 f16v2 = radians(f16v1);
\r
160 f16v2 = degrees(f16v1);
\r
161 f16v2 = sin(f16v1);
\r
162 f16v2 = cos(f16v1);
\r
163 f16v2 = tan(f16v1);
\r
164 f16v2 = asin(f16v1);
\r
165 f16v2 = acos(f16v1);
\r
166 f16v2 = atan(f16v1, f16v2);
\r
167 f16v2 = atan(f16v1);
\r
168 f16v2 = sinh(f16v1);
\r
169 f16v2 = cosh(f16v1);
\r
170 f16v2 = tanh(f16v1);
\r
171 f16v2 = asinh(f16v1);
\r
172 f16v2 = acosh(f16v1);
\r
173 f16v2 = atanh(f16v1);
\r
176 void builtinExpFuncs()
\r
178 f16vec2 f16v1, f16v2;
\r
180 f16v2 = pow(f16v1, f16v2);
\r
181 f16v2 = exp(f16v1);
\r
182 f16v2 = log(f16v1);
\r
183 f16v2 = exp2(f16v1);
\r
184 f16v2 = log2(f16v1);
\r
185 f16v2 = sqrt(f16v1);
\r
186 f16v2 = inversesqrt(f16v1);
\r
189 void builtinCommonFuncs()
\r
191 f16vec3 f16v1, f16v2, f16v3;
\r
197 f16v2 = abs(f16v1);
\r
198 f16v2 = sign(f16v1);
\r
199 f16v2 = floor(f16v1);
\r
200 f16v2 = trunc(f16v1);
\r
201 f16v2 = round(f16v1);
\r
202 f16v2 = roundEven(f16v1);
\r
203 f16v2 = ceil(f16v1);
\r
204 f16v2 = fract(f16v1);
\r
205 f16v2 = mod(f16v1, f16v2);
\r
206 f16v2 = mod(f16v1, f16);
\r
207 f16v3 = modf(f16v1, f16v2);
\r
208 f16v3 = min(f16v1, f16v2);
\r
209 f16v3 = min(f16v1, f16);
\r
210 f16v3 = max(f16v1, f16v2);
\r
211 f16v3 = max(f16v1, f16);
\r
212 f16v3 = clamp(f16v1, f16, f16v2.x);
\r
213 f16v3 = clamp(f16v1, f16v2, f16vec3(f16));
\r
214 f16v3 = mix(f16v1, f16v2, f16);
\r
215 f16v3 = mix(f16v1, f16v2, f16v3);
\r
216 f16v3 = mix(f16v1, f16v2, bv);
\r
217 f16v3 = step(f16v1, f16v2);
\r
218 f16v3 = step(f16, f16v3);
\r
219 f16v3 = smoothstep(f16v1, f16v2, f16v3);
\r
220 f16v3 = smoothstep(f16, f16v1.x, f16v2);
\r
223 f16v3 = fma(f16v1, f16v2, f16v3);
\r
224 f16v2 = frexp(f16v1, iv);
\r
225 f16v2 = ldexp(f16v1, iv);
\r
228 void builtinPackUnpackFuncs()
\r
233 u = packFloat2x16(f16v);
\r
234 f16v = unpackFloat2x16(u);
\r
237 void builtinGeometryFuncs()
\r
240 f16vec3 f16v1, f16v2, f16v3;
\r
242 f16 = length(f16v1);
\r
243 f16 = distance(f16v1, f16v2);
\r
244 f16 = dot(f16v1, f16v2);
\r
245 f16v3 = cross(f16v1, f16v2);
\r
246 f16v2 = normalize(f16v1);
\r
247 f16v3 = faceforward(f16v1, f16v2, f16v3);
\r
248 f16v3 = reflect(f16v1, f16v2);
\r
249 f16v3 = refract(f16v1, f16v2, f16);
\r
252 void builtinMatrixFuncs()
\r
254 f16mat2x3 f16m1, f16m2, f16m3;
\r
257 f16mat4 f16m6, f16m7;
\r
264 f16m3 = matrixCompMult(f16m1, f16m2);
\r
265 f16m1 = outerProduct(f16v1, f16v2);
\r
266 f16m4 = transpose(f16m1);
\r
267 f16 = determinant(f16m5);
\r
268 f16m6 = inverse(f16m7);
\r
271 void builtinVecRelFuncs()
\r
273 f16vec3 f16v1, f16v2;
\r
276 bv = lessThan(f16v1, f16v2);
\r
277 bv = lessThanEqual(f16v1, f16v2);
\r
278 bv = greaterThan(f16v1, f16v2);
\r
279 bv = greaterThanEqual(f16v1, f16v2);
\r
280 bv = equal(f16v1, f16v2);
\r
281 bv = notEqual(f16v1, f16v2);
\r
286 void builtinFragProcFuncs()
\r
291 f16v.x = dFdx(if16v.x);
\r
292 f16v.y = dFdy(if16v.y);
\r
293 f16v.xy = dFdxFine(if16v.xy);
\r
294 f16v.xy = dFdyFine(if16v.xy);
\r
295 f16v = dFdxCoarse(if16v);
\r
296 f16v = dFdxCoarse(if16v);
\r
298 f16v.x = fwidth(if16v.x);
\r
299 f16v.xy = fwidthFine(if16v.xy);
\r
300 f16v = fwidthCoarse(if16v);
\r
303 f16v.x = interpolateAtCentroid(if16v.x);
\r
304 f16v.xy = interpolateAtSample(if16v.xy, 1);
\r
305 f16v = interpolateAtOffset(if16v, f16vec2(0.5hf));
\r