size = l / 4;
}
+ /* See: https://github.com/GPUOpen-Tools/radeon_gpu_profiler/issues/65 and
+ * https://github.com/llvm/llvm-project/issues/38652
+ */
+ if (chip == GFX9 && (binary[pos] & 0xfc024000) == 0xc0024000) {
+ /* SMEM with IMM=1 and SOE=1: LLVM ignores SOFFSET */
+ size_t len = strlen(outline);
+ snprintf(outline + len, outline_size - len, ", s%u", binary[pos + 1] >> 25);
+ } else if (chip >= GFX10 && (binary[pos] & 0xfc000000) == 0xf4000000 &&
+ (binary[pos + 1] & 0xfe000000) != 0xfa000000) {
+ /* SMEM non-NULL SOFFSET: LLVM ignores OFFSET */
+ uint32_t offset = binary[pos + 1] & 0x1fffff;
+ if (offset) {
+ size_t len = strlen(outline);
+ snprintf(outline + len, outline_size - len, ", 0x%x", offset);
+ }
+ }
+
return std::make_pair(invalid, size);
}
finish_assembler_test();
}
END_TEST
+
+BEGIN_TEST(assembler.smem_offset)
+ for (unsigned i = GFX9; i <= GFX10; i++) {
+ if (!setup_cs(NULL, (chip_class)i))
+ continue;
+
+ Definition dst(PhysReg(7), s1);
+ Operand sbase(PhysReg(6), s2);
+ Operand offset(PhysReg(5), s1);
+
+ //~gfx9>> s_load_dword s7, s[6:7], s5 ; c00001c3 00000005
+ //~gfx10>> s_load_dword s7, s[6:7], s5 ; f40001c3 0a000000
+ bld.smem(aco_opcode::s_load_dword, dst, sbase, offset);
+ //~gfx9! s_load_dword s7, s[6:7], 0x42 ; c00201c3 00000042
+ //~gfx10! s_load_dword s7, s[6:7], 0x42 ; f40001c3 fa000042
+ bld.smem(aco_opcode::s_load_dword, dst, sbase, Operand::c32(0x42));
+ if (i >= GFX9) {
+ //~gfx9! s_load_dword s7, s[6:7], 0x42, s5 ; c00241c3 0a000042
+ //~gfx10! s_load_dword s7, s[6:7], s5, 0x42 ; f40001c3 0a000042
+ bld.smem(aco_opcode::s_load_dword, dst, sbase, Operand::c32(0x42), offset);
+ }
+
+ finish_assembler_test();
+ }
+END_TEST