2 #extension GL_KHR_memory_scope_semantics : enable
\r
3 #extension GL_NV_cooperative_matrix : enable
\r
4 #extension GL_NV_integer_cooperative_matrix : enable
\r
5 #extension GL_EXT_shader_explicit_arithmetic_types : enable
\r
6 #extension GL_EXT_buffer_reference : enable
\r
8 layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
11 layout(constant_id = 0) const int Y = 2;
\r
14 icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC;
\r
15 icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC2[3];
\r
16 ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC;
\r
17 ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC2[3];
\r
19 int iarr[miC.length()];
\r
20 int iarr2[miC2[1].length()];
\r
21 int uarr[muC.length()];
\r
22 int uarr2[muC2[1].length()];
\r
24 const icoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = icoopmatNV<32, gl_ScopeSubgroup, Z, 8>(1);
\r
25 const ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> mD2 = ucoopmatNV<8, gl_ScopeSubgroup, 8, 8>(1);
\r
27 struct S { int a; int b; int c; };
\r
29 const S s = S(12, 23, 34);
\r
31 layout(set = 0, binding = 0, buffer_reference) coherent buffer Block {
\r
36 layout(set = 0, binding = 0) coherent buffer Block16 {
\r
37 int8_t y[1024*1024];
\r
43 icoopmatNV<8, gl_ScopeSubgroup, 8, 8> ineg(icoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return -m; }
\r
44 ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> umul(ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return m * uint8_t(2); }
\r
46 layout(constant_id = 2) const int SC = 1;
\r
47 ucoopmatNV<32, gl_ScopeSubgroup, SC, SC> scm[SC][SC];
\r
49 // sized for icoopmatNV<8, gl_ScopeSubgroup, 16, 16>
\r
50 shared uvec4 shmatrix[16*16*2/16];
\r
54 ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mu = ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2);
\r
55 icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mi = icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2);
\r
60 mi = mi * int8_t(2);
\r
62 fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_0 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mu);
\r
63 fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_0 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mu);
\r
64 fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_1 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mi);
\r
65 fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_1 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mi);
\r
70 coopMatLoadNV(mi, block.x, 16, 128, false);
\r
71 coopMatStoreNV(mi, block.x, 16, 128, false);
\r
72 coopMatLoadNV(mu, block8.x, 16, 128, false);
\r
73 coopMatStoreNV(mu, block8.x, 16, 128, false);
\r
74 coopMatLoadNV(mi, block8.b.x, 16, 128, false);
\r
75 coopMatStoreNV(mi, block8.b.x, 16, 128, false);
\r
77 ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> A;
\r
78 ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> B;
\r
79 ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> C;
\r
80 ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> D;
\r
81 D = coopMatMulAddNV(A, B, C);
\r
86 icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5];
\r
87 a[3][0] = int8_t(1);
\r
91 md1 += (mi += mi)[1234];
\r
94 muC2[1][0] = (miC2[2][0]);
\r
96 coopMatLoadNV(mi, block.y, 16, 128, false);
\r
97 coopMatStoreNV(mi, block.y, 16, 128, false);
\r
98 coopMatLoadNV(mu, block8.y, 16, 128, false);
\r
99 coopMatStoreNV(mu, block8.y, 16, 128, false);
\r
101 icoopmatNV<8, gl_ScopeSubgroup, 8, 8> p1;
\r
102 ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> p2;
\r
113 icoopmatNV<8, gl_ScopeSubgroup, 16, 8> ms;
\r
114 coopMatLoadNV(ms, shmatrix, 1, 2, false);
\r
115 coopMatStoreNV(ms, shmatrix, 1, 2, false);
\r