2 #extension GL_KHR_memory_scope_semantics : enable
\r
3 #extension GL_NV_cooperative_matrix : enable
\r
4 #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
\r
5 #extension GL_EXT_buffer_reference : enable
\r
7 layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
10 layout(constant_id = 0) const int Y = 2;
\r
13 fcoopmatNV<16, gl_ScopeSubgroup, Z, 8> mC;
\r
14 fcoopmatNV<16, gl_ScopeSubgroup, Z, 8> mC2[3];
\r
16 int arr[mC.length()];
\r
17 int arr2[mC2[1].length()];
\r
19 layout(constant_id = 1) const float F = 3.0;
\r
21 const fcoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = fcoopmatNV<32, gl_ScopeSubgroup, Z, 8>(0.0);
\r
22 const fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> mD2 = fcoopmatNV<16, gl_ScopeSubgroup, 8, 8>(1);
\r
24 struct S { int a; int b; int c; };
\r
26 const S s = S(12, 23, 34);
\r
28 layout(set = 0, binding = 0, buffer_reference) coherent buffer Block {
\r
33 layout(set = 0, binding = 0) coherent buffer Block16 {
\r
34 float16_t y[1024*1024];
\r
40 fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> f16(fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> m) { return -m; }
\r
41 fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> f32(fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> m) { return -m; }
\r
43 layout(constant_id = 2) const int SC = 1;
\r
44 fcoopmatNV<16, gl_ScopeSubgroup, SC, SC> scm[SC][SC];
\r
46 // sized for fcoopmatNV<16, gl_ScopeSubgroup, 16, 16>
\r
47 shared uvec4 shmatrix[16*16*2/16];
\r
51 fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)> m = fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)>(0.0);
\r
59 fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> m2 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(m);
\r
64 coopMatLoadNV(m, block.x, 16, 128, false);
\r
65 coopMatStoreNV(m, block.x, 16, 128, false);
\r
66 coopMatLoadNV(m2, block16.x, 16, 128, false);
\r
67 coopMatStoreNV(m2, block16.x, 16, 128, false);
\r
68 coopMatLoadNV(m, block16.b.x, 16, 128, false);
\r
69 coopMatStoreNV(m, block16.b.x, 16, 128, false);
\r
71 fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> A;
\r
72 fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> B;
\r
73 fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> C;
\r
74 fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> D;
\r
75 D = coopMatMulAddNV(A, B, C);
\r
79 fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> E;
\r
81 fcoopmatNV<16, gl_ScopeSubgroup, Z, Z> F = fcoopmatNV<16, gl_ScopeSubgroup, Z, Z>(0.0);
\r
83 fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5];
\r
88 md1 += (m += m)[1234];
\r
92 coopMatLoadNV(m, block.y, 16, 128, false);
\r
93 coopMatStoreNV(m, block.y, 16, 128, false);
\r
94 coopMatLoadNV(m2, block16.y, 16, 128, false);
\r
95 coopMatStoreNV(m2, block16.y, 16, 128, false);
\r
97 fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> p1;
\r
98 fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> p2;
\r
103 p1 = fcoopmatNV<16, gl_ScopeSubgroup, 8, 8>(0.0);
\r
104 p2 = fcoopmatNV<32, gl_ScopeSubgroup, 8, 8>(0.0);
\r
108 p1 *= float16_t(2.0);
\r
111 fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> ms;
\r
112 coopMatLoadNV(ms, shmatrix, 1, 2, false);
\r
113 coopMatStoreNV(ms, shmatrix, 1, 2, false);
\r