#version 450 core #extension GL_KHR_memory_scope_semantics : enable #extension GL_NV_cooperative_matrix : enable #extension GL_NV_integer_cooperative_matrix : enable #extension GL_EXT_shader_explicit_arithmetic_types : enable #extension GL_EXT_buffer_reference : enable layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; const int X = 8; layout(constant_id = 0) const int Y = 2; const int Z = X*Y; icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC; icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC2[3]; ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC; ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC2[3]; int iarr[miC.length()]; int iarr2[miC2[1].length()]; int uarr[muC.length()]; int uarr2[muC2[1].length()]; const icoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = icoopmatNV<32, gl_ScopeSubgroup, Z, 8>(1); const ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> mD2 = ucoopmatNV<8, gl_ScopeSubgroup, 8, 8>(1); struct S { int a; int b; int c; }; const S s = S(12, 23, 34); layout(set = 0, binding = 0, buffer_reference) coherent buffer Block { uint y[1024*1024]; uint x[]; } block; layout(set = 0, binding = 0) coherent buffer Block16 { int8_t y[1024*1024]; int8_t x[]; Block b; } block8; icoopmatNV<8, gl_ScopeSubgroup, 8, 8> ineg(icoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return -m; } ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> umul(ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return m * uint8_t(2); } layout(constant_id = 2) const int SC = 1; ucoopmatNV<32, gl_ScopeSubgroup, SC, SC> scm[SC][SC]; // sized for icoopmatNV<8, gl_ScopeSubgroup, 16, 16> shared uvec4 shmatrix[16*16*2/16]; void main() { ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mu = ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2); icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mi = icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2); mu = mu + mu; mu = mu - mu; mi = -mi; mi = mi * int8_t(2); fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_0 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mu); fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_0 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mu); fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_1 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mi); fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_1 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mi); uint8_t x = mu[1]; mi[0] = int8_t(x); coopMatLoadNV(mi, block.x, 16, 128, false); coopMatStoreNV(mi, block.x, 16, 128, false); coopMatLoadNV(mu, block8.x, 16, 128, false); coopMatStoreNV(mu, block8.x, 16, 128, false); coopMatLoadNV(mi, block8.b.x, 16, 128, false); coopMatStoreNV(mi, block8.b.x, 16, 128, false); ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> A; ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> B; ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> C; ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> D; D = coopMatMulAddNV(A, B, C); int l = D.length(); icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5]; a[3][0] = int8_t(1); int md1 = mD[1]; md1 += (mi += mi)[1234]; muC2[0] = muC2[1]; muC2[1][0] = (miC2[2][0]); coopMatLoadNV(mi, block.y, 16, 128, false); coopMatStoreNV(mi, block.y, 16, 128, false); coopMatLoadNV(mu, block8.y, 16, 128, false); coopMatStoreNV(mu, block8.y, 16, 128, false); icoopmatNV<8, gl_ScopeSubgroup, 8, 8> p1; ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> p2; p1 = ineg(p1); p2 = umul(p2); p1 /= p1; p2 /= p2; p1 *= int8_t(2); p2 *= uint8_t(4); icoopmatNV<8, gl_ScopeSubgroup, 16, 8> ms; coopMatLoadNV(ms, shmatrix, 1, 2, false); coopMatStoreNV(ms, shmatrix, 1, 2, false); }