#version 450 core #extension GL_KHR_memory_scope_semantics : enable #extension GL_NV_cooperative_matrix : enable #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable #extension GL_EXT_buffer_reference : enable layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; const int X = 8; layout(constant_id = 0) const int Y = 2; const int Z = X*Y; fcoopmatNV<16, gl_ScopeSubgroup, Z, 8> mC; fcoopmatNV<16, gl_ScopeSubgroup, Z, 8> mC2[3]; int arr[mC.length()]; int arr2[mC2[1].length()]; layout(constant_id = 1) const float F = 3.0; const fcoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = fcoopmatNV<32, gl_ScopeSubgroup, Z, 8>(0.0); const fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> mD2 = fcoopmatNV<16, gl_ScopeSubgroup, 8, 8>(1); struct S { int a; int b; int c; }; const S s = S(12, 23, 34); layout(set = 0, binding = 0, buffer_reference) coherent buffer Block { float y[1024*1024]; float x[]; } block; layout(set = 0, binding = 0) coherent buffer Block16 { float16_t y[1024*1024]; float16_t x[]; Block b; } block16; fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> f16(fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> m) { return -m; } fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> f32(fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> m) { return -m; } layout(constant_id = 2) const int SC = 1; fcoopmatNV<16, gl_ScopeSubgroup, SC, SC> scm[SC][SC]; // sized for fcoopmatNV<16, gl_ScopeSubgroup, 16, 16> shared uvec4 shmatrix[16*16*2/16]; void main() { fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)> m = fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)>(0.0); m = m + m; m = m - m; m = -m; m = 2.0*m; m = m*2.0; fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> m2 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(m); float x = m[1]; m[0] = x; coopMatLoadNV(m, block.x, 16, 128, false); coopMatStoreNV(m, block.x, 16, 128, false); coopMatLoadNV(m2, block16.x, 16, 128, false); coopMatStoreNV(m2, block16.x, 16, 128, false); coopMatLoadNV(m, block16.b.x, 16, 128, false); coopMatStoreNV(m, block16.b.x, 16, 128, false); fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> A; fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> B; fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> C; fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> D; D = coopMatMulAddNV(A, B, C); int l = D.length(); fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> E; fcoopmatNV<16, gl_ScopeSubgroup, Z, Z> F = fcoopmatNV<16, gl_ScopeSubgroup, Z, Z>(0.0); fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5]; a[3][0] = 1.0; float md1 = mD[1]; md1 += (m += m)[1234]; mC2[1] = mC2[2]; coopMatLoadNV(m, block.y, 16, 128, false); coopMatStoreNV(m, block.y, 16, 128, false); coopMatLoadNV(m2, block16.y, 16, 128, false); coopMatStoreNV(m2, block16.y, 16, 128, false); fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> p1; fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> p2; p1 = f16(p1); p2 = f32(p2); p1 = fcoopmatNV<16, gl_ScopeSubgroup, 8, 8>(0.0); p2 = fcoopmatNV<32, gl_ScopeSubgroup, 8, 8>(0.0); p1 /= p1; p1 *= float16_t(2.0); p2 *= 4.0; fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> ms; coopMatLoadNV(ms, shmatrix, 1, 2, false); coopMatStoreNV(ms, shmatrix, 1, 2, false); }