2 # Copyright (c) 2018 Valve Corporation
4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the "Software"),
6 # to deal in the Software without restriction, including without limitation
7 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 # and/or sell copies of the Software, and to permit persons to whom the
9 # Software is furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice (including the next
12 # paragraph) shall be included in all copies or substantial portions of the
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 # Class that represents all the information we have about the opcode
25 # NOTE: this must be kept in sync with aco_op_info
30 class InstrClass(Enum):
36 ValuTranscendental32 = 5
40 ValuDoubleTranscendental = 9
83 def get_builder_fields(self):
84 if self == Format.SOPK:
85 return [('uint16_t', 'imm', None)]
86 elif self == Format.SOPP:
87 return [('uint32_t', 'block', '-1'),
88 ('uint32_t', 'imm', '0')]
89 elif self == Format.SMEM:
90 return [('memory_sync_info', 'sync', 'memory_sync_info()'),
91 ('bool', 'glc', 'false'),
92 ('bool', 'dlc', 'false'),
93 ('bool', 'nv', 'false')]
94 elif self == Format.DS:
95 return [('uint16_t', 'offset0', '0'),
96 ('uint8_t', 'offset1', '0'),
97 ('bool', 'gds', 'false')]
98 elif self == Format.LDSDIR:
99 return [('uint8_t', 'attr', 0),
100 ('uint8_t', 'attr_chan', 0),
101 ('memory_sync_info', 'sync', 'memory_sync_info()'),
102 ('uint8_t', 'wait_vdst', 15)]
103 elif self == Format.MTBUF:
104 return [('unsigned', 'dfmt', None),
105 ('unsigned', 'nfmt', None),
106 ('unsigned', 'offset', None),
107 ('bool', 'offen', None),
108 ('bool', 'idxen', 'false'),
109 ('bool', 'disable_wqm', 'false'),
110 ('bool', 'glc', 'false'),
111 ('bool', 'dlc', 'false'),
112 ('bool', 'slc', 'false'),
113 ('bool', 'tfe', 'false')]
114 elif self == Format.MUBUF:
115 return [('unsigned', 'offset', None),
116 ('bool', 'offen', None),
117 ('bool', 'swizzled', 'false'),
118 ('bool', 'idxen', 'false'),
119 ('bool', 'addr64', 'false'),
120 ('bool', 'disable_wqm', 'false'),
121 ('bool', 'glc', 'false'),
122 ('bool', 'dlc', 'false'),
123 ('bool', 'slc', 'false'),
124 ('bool', 'tfe', 'false'),
125 ('bool', 'lds', 'false')]
126 elif self == Format.MIMG:
127 return [('unsigned', 'dmask', '0xF'),
128 ('bool', 'da', 'false'),
129 ('bool', 'unrm', 'false'),
130 ('bool', 'disable_wqm', 'false'),
131 ('bool', 'glc', 'false'),
132 ('bool', 'dlc', 'false'),
133 ('bool', 'slc', 'false'),
134 ('bool', 'tfe', 'false'),
135 ('bool', 'lwe', 'false'),
136 ('bool', 'r128', 'false'),
137 ('bool', 'a16', 'false'),
138 ('bool', 'd16', 'false')]
139 return [('unsigned', 'attribute', None),
140 ('unsigned', 'component', None)]
141 elif self == Format.EXP:
142 return [('unsigned', 'enabled_mask', None),
143 ('unsigned', 'dest', None),
144 ('bool', 'compr', 'false', 'compressed'),
145 ('bool', 'done', 'false'),
146 ('bool', 'vm', 'false', 'valid_mask')]
147 elif self == Format.PSEUDO_BRANCH:
148 return [('uint32_t', 'target0', '0', 'target[0]'),
149 ('uint32_t', 'target1', '0', 'target[1]')]
150 elif self == Format.PSEUDO_REDUCTION:
151 return [('ReduceOp', 'op', None, 'reduce_op'),
152 ('unsigned', 'cluster_size', '0')]
153 elif self == Format.PSEUDO_BARRIER:
154 return [('memory_sync_info', 'sync', None),
155 ('sync_scope', 'exec_scope', 'scope_invocation')]
156 elif self == Format.VINTRP:
157 return [('unsigned', 'attribute', None),
158 ('unsigned', 'component', None)]
159 elif self == Format.DPP16:
160 return [('uint16_t', 'dpp_ctrl', None),
161 ('uint8_t', 'row_mask', '0xF'),
162 ('uint8_t', 'bank_mask', '0xF'),
163 ('bool', 'bound_ctrl', 'true')]
164 elif self == Format.VOP3P:
165 return [('uint8_t', 'opsel_lo', None),
166 ('uint8_t', 'opsel_hi', None)]
167 elif self == Format.VINTERP_INREG:
168 return [('unsigned', 'wait_exp', 7),
169 ('uint8_t', 'opsel', 0)]
170 elif self in [Format.FLAT, Format.GLOBAL, Format.SCRATCH]:
171 return [('int16_t', 'offset', 0),
172 ('memory_sync_info', 'sync', 'memory_sync_info()'),
173 ('bool', 'glc', 'false'),
174 ('bool', 'slc', 'false'),
175 ('bool', 'lds', 'false'),
176 ('bool', 'nv', 'false')]
180 def get_builder_field_names(self):
181 return [f[1] for f in self.get_builder_fields()]
183 def get_builder_field_dests(self):
184 return [(f[3] if len(f) >= 4 else f[1]) for f in self.get_builder_fields()]
186 def get_builder_field_decls(self):
187 return [('%s %s=%s' % (f[0], f[1], f[2]) if f[2] != None else '%s %s' % (f[0], f[1])) for f in self.get_builder_fields()]
189 def get_builder_initialization(self, num_operands):
191 if self == Format.SDWA:
192 for i in range(min(num_operands, 2)):
193 res += 'instr->sel[{0}] = SubdwordSel(op{0}.op.bytes(), 0, false);'.format(i)
194 res += 'instr->dst_sel = SubdwordSel(def0.bytes(), 0, false);\n'
198 class Opcode(object):
199 """Class that represents all the information we have about the opcode
200 NOTE: this must be kept in sync with aco_op_info
202 def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, opcode_gfx11, format, input_mod, output_mod, is_atomic, cls):
203 assert isinstance(name, str)
204 assert isinstance(opcode_gfx7, int)
205 assert isinstance(opcode_gfx9, int)
206 assert isinstance(opcode_gfx10, int)
207 assert isinstance(opcode_gfx11, int)
208 assert isinstance(format, Format)
209 assert isinstance(input_mod, bool)
210 assert isinstance(output_mod, bool)
213 self.opcode_gfx7 = opcode_gfx7
214 self.opcode_gfx9 = opcode_gfx9
215 self.opcode_gfx10 = opcode_gfx10
216 self.opcode_gfx11 = opcode_gfx11
217 self.input_mod = "1" if input_mod else "0"
218 self.output_mod = "1" if output_mod else "0"
219 self.is_atomic = "1" if is_atomic else "0"
223 parts = name.replace('_e64', '').rsplit('_', 2)
226 op_dtype_sizes = {'{}{}'.format(prefix, size) : size for prefix in 'biuf' for size in [64, 32, 24, 16]}
227 # inline constants are 32-bit for 16-bit integer/typeless instructions: https://reviews.llvm.org/D81841
228 op_dtype_sizes['b16'] = 32
229 op_dtype_sizes['i16'] = 32
230 op_dtype_sizes['u16'] = 32
232 # If we can't tell the operand size, default to 32.
233 self.operand_size = op_dtype_sizes.get(op_dtype, 32)
235 # exceptions for operands:
237 self.operand_size = 0
239 self.operand_size = 32
240 elif name in ['v_mad_u64_u32', 'v_mad_i64_i32']:
241 self.operand_size = 0
242 elif self.operand_size == 24:
243 self.operand_size = 32
244 elif op_dtype == 'u8' or op_dtype == 'i8':
245 self.operand_size = 32
246 elif name in ['v_cvt_f32_ubyte0', 'v_cvt_f32_ubyte1',
247 'v_cvt_f32_ubyte2', 'v_cvt_f32_ubyte3']:
248 self.operand_size = 32
250 # global dictionary of opcodes
253 def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, opcode_gfx11 = -1, format = Format.PSEUDO, cls = InstrClass.Other, input_mod = False, output_mod = False, is_atomic = False):
254 assert name not in opcodes
255 opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, opcode_gfx11, format, input_mod, output_mod, is_atomic, cls)
257 def default_class(opcodes, cls):
259 if isinstance(op[-1], InstrClass):
264 opcode("exp", 0, 0, 0, 0, format = Format.EXP, cls = InstrClass.Export)
265 opcode("p_parallelcopy")
269 opcode("p_linear_phi")
270 opcode("p_as_uniform")
271 opcode("p_unit_test")
273 opcode("p_create_vector")
274 opcode("p_extract_vector")
275 opcode("p_split_vector")
277 # start/end the parts where we can use exec based instructions
279 opcode("p_logical_start")
280 opcode("p_logical_end")
282 # e.g. subgroupMin() in SPIR-V
283 opcode("p_reduce", format=Format.PSEUDO_REDUCTION)
284 # e.g. subgroupInclusiveMin()
285 opcode("p_inclusive_scan", format=Format.PSEUDO_REDUCTION)
286 # e.g. subgroupExclusiveMin()
287 opcode("p_exclusive_scan", format=Format.PSEUDO_REDUCTION)
289 opcode("p_branch", format=Format.PSEUDO_BRANCH)
290 opcode("p_cbranch", format=Format.PSEUDO_BRANCH)
291 opcode("p_cbranch_z", format=Format.PSEUDO_BRANCH)
292 opcode("p_cbranch_nz", format=Format.PSEUDO_BRANCH)
294 opcode("p_barrier", format=Format.PSEUDO_BARRIER)
299 # Start/end linear vgprs. p_start_linear_vgpr can take an operand to copy from, into the linear vgpr
300 opcode("p_start_linear_vgpr")
301 opcode("p_end_linear_vgpr")
304 opcode("p_discard_if")
305 opcode("p_demote_to_helper")
306 opcode("p_is_helper")
307 opcode("p_exit_early_if")
309 # simulates proper bpermute behavior on GFX6
310 # definitions: result VGPR, temp EXEC, clobbered VCC
311 # operands: index, input data
312 opcode("p_bpermute_gfx6")
314 # simulates proper bpermute behavior on GFX10
315 # definitions: result VGPR, temp EXEC, clobbered SCC
316 # operands: index * 4, input data, same half (bool)
317 opcode("p_bpermute_gfx10w64")
319 # simulates proper bpermute behavior on GFX11
320 # definitions: result VGPR, temp EXEC, clobbered SCC
321 # operands: linear VGPR, index * 4, input data, same half (bool)
322 opcode("p_bpermute_gfx11w64")
324 # creates a lane mask where only the first active lane is selected
327 opcode("p_constaddr")
328 opcode("p_resume_shader_address")
330 # These don't have to be pseudo-ops, but it makes optimization easier to only
331 # have to consider two instructions.
332 # (src0 >> (index * bits)) & ((1 << bits) - 1) with optional sign extension
333 opcode("p_extract") # src1=index, src2=bits, src3=signext
334 # (src0 & ((1 << bits) - 1)) << (index * bits)
335 opcode("p_insert") # src1=index, src2=bits
337 opcode("p_init_scratch")
339 # jumps to a shader epilog
340 opcode("p_jump_to_epilog")
342 # loads and interpolates a fragment shader input with a correct exec mask
343 #dst0=result, src0=linear_vgpr, src1=attribute, src2=component, src3=coord1, src4=coord2, src5=m0
344 #dst0=result, src0=linear_vgpr, src1=attribute, src2=component, src3=dpp_ctrl, src4=m0
345 opcode("p_interp_gfx11")
347 # performs dual source MRTs swizzling and emits exports on GFX11
348 opcode("p_dual_src_export_gfx11")
350 # SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
352 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
353 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_add_u32"),
354 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "s_sub_u32"),
355 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "s_add_i32"),
356 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "s_sub_i32"),
357 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "s_addc_u32"),
358 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "s_subb_u32"),
359 (0x06, 0x06, 0x06, 0x06, 0x06, 0x12, "s_min_i32"),
360 (0x07, 0x07, 0x07, 0x07, 0x07, 0x13, "s_min_u32"),
361 (0x08, 0x08, 0x08, 0x08, 0x08, 0x14, "s_max_i32"),
362 (0x09, 0x09, 0x09, 0x09, 0x09, 0x15, "s_max_u32"),
363 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x30, "s_cselect_b32"),
364 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x31, "s_cselect_b64"),
365 (0x0e, 0x0e, 0x0c, 0x0c, 0x0e, 0x16, "s_and_b32"),
366 (0x0f, 0x0f, 0x0d, 0x0d, 0x0f, 0x17, "s_and_b64"),
367 (0x10, 0x10, 0x0e, 0x0e, 0x10, 0x18, "s_or_b32"),
368 (0x11, 0x11, 0x0f, 0x0f, 0x11, 0x19, "s_or_b64"),
369 (0x12, 0x12, 0x10, 0x10, 0x12, 0x1a, "s_xor_b32"),
370 (0x13, 0x13, 0x11, 0x11, 0x13, 0x1b, "s_xor_b64"),
371 (0x14, 0x14, 0x12, 0x12, 0x14, 0x22, "s_andn2_b32"), #s_and_not1_b32 in GFX11
372 (0x15, 0x15, 0x13, 0x13, 0x15, 0x23, "s_andn2_b64"), #s_and_not1_b64 in GFX11
373 (0x16, 0x16, 0x14, 0x14, 0x16, 0x24, "s_orn2_b32"), #s_or_not1_b32 in GFX11
374 (0x17, 0x17, 0x15, 0x15, 0x17, 0x25, "s_orn2_b64"), #s_or_not1_b64 in GFX11
375 (0x18, 0x18, 0x16, 0x16, 0x18, 0x1c, "s_nand_b32"),
376 (0x19, 0x19, 0x17, 0x17, 0x19, 0x1d, "s_nand_b64"),
377 (0x1a, 0x1a, 0x18, 0x18, 0x1a, 0x1e, "s_nor_b32"),
378 (0x1b, 0x1b, 0x19, 0x19, 0x1b, 0x1f, "s_nor_b64"),
379 (0x1c, 0x1c, 0x1a, 0x1a, 0x1c, 0x20, "s_xnor_b32"),
380 (0x1d, 0x1d, 0x1b, 0x1b, 0x1d, 0x21, "s_xnor_b64"),
381 (0x1e, 0x1e, 0x1c, 0x1c, 0x1e, 0x08, "s_lshl_b32"),
382 (0x1f, 0x1f, 0x1d, 0x1d, 0x1f, 0x09, "s_lshl_b64"),
383 (0x20, 0x20, 0x1e, 0x1e, 0x20, 0x0a, "s_lshr_b32"),
384 (0x21, 0x21, 0x1f, 0x1f, 0x21, 0x0b, "s_lshr_b64"),
385 (0x22, 0x22, 0x20, 0x20, 0x22, 0x0c, "s_ashr_i32"),
386 (0x23, 0x23, 0x21, 0x21, 0x23, 0x0d, "s_ashr_i64"),
387 (0x24, 0x24, 0x22, 0x22, 0x24, 0x2a, "s_bfm_b32"),
388 (0x25, 0x25, 0x23, 0x23, 0x25, 0x2b, "s_bfm_b64"),
389 (0x26, 0x26, 0x24, 0x24, 0x26, 0x2c, "s_mul_i32"),
390 (0x27, 0x27, 0x25, 0x25, 0x27, 0x26, "s_bfe_u32"),
391 (0x28, 0x28, 0x26, 0x26, 0x28, 0x27, "s_bfe_i32"),
392 (0x29, 0x29, 0x27, 0x27, 0x29, 0x28, "s_bfe_u64"),
393 (0x2a, 0x2a, 0x28, 0x28, 0x2a, 0x29, "s_bfe_i64"),
394 (0x2b, 0x2b, 0x29, 0x29, -1, -1, "s_cbranch_g_fork", InstrClass.Branch),
395 (0x2c, 0x2c, 0x2a, 0x2a, 0x2c, 0x06, "s_absdiff_i32"),
396 ( -1, -1, 0x2b, 0x2b, -1, -1, "s_rfe_restore_b64", InstrClass.Branch),
397 ( -1, -1, -1, 0x2e, 0x2e, 0x0e, "s_lshl1_add_u32"),
398 ( -1, -1, -1, 0x2f, 0x2f, 0x0f, "s_lshl2_add_u32"),
399 ( -1, -1, -1, 0x30, 0x30, 0x10, "s_lshl3_add_u32"),
400 ( -1, -1, -1, 0x31, 0x31, 0x11, "s_lshl4_add_u32"),
401 ( -1, -1, -1, 0x32, 0x32, 0x32, "s_pack_ll_b32_b16"),
402 ( -1, -1, -1, 0x33, 0x33, 0x33, "s_pack_lh_b32_b16"),
403 ( -1, -1, -1, 0x34, 0x34, 0x34, "s_pack_hh_b32_b16"),
404 ( -1, -1, -1, -1, -1, 0x35, "s_pack_hl_b32_b16"),
405 ( -1, -1, -1, 0x2c, 0x35, 0x2d, "s_mul_hi_u32"),
406 ( -1, -1, -1, 0x2d, 0x36, 0x2e, "s_mul_hi_i32"),
407 # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP2.
408 ( -1, -1, -1, -1, -1, -1, "p_constaddr_addlo"),
409 ( -1, -1, -1, -1, -1, -1, "p_resumeaddr_addlo"),
411 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, cls) in default_class(SOP2, InstrClass.Salu):
412 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOP2, cls)
415 # SOPK instructions: 0 input (+ imm), 1 output + optional scc
417 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
418 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_movk_i32"),
419 ( -1, -1, -1, -1, 0x01, 0x01, "s_version"),
420 (0x02, 0x02, 0x01, 0x01, 0x02, 0x02, "s_cmovk_i32"),
421 (0x03, 0x03, 0x02, 0x02, 0x03, 0x03, "s_cmpk_eq_i32"),
422 (0x04, 0x04, 0x03, 0x03, 0x04, 0x04, "s_cmpk_lg_i32"),
423 (0x05, 0x05, 0x04, 0x04, 0x05, 0x05, "s_cmpk_gt_i32"),
424 (0x06, 0x06, 0x05, 0x05, 0x06, 0x06, "s_cmpk_ge_i32"),
425 (0x07, 0x07, 0x06, 0x06, 0x07, 0x07, "s_cmpk_lt_i32"),
426 (0x08, 0x08, 0x07, 0x07, 0x08, 0x08, "s_cmpk_le_i32"),
427 (0x09, 0x09, 0x08, 0x08, 0x09, 0x09, "s_cmpk_eq_u32"),
428 (0x0a, 0x0a, 0x09, 0x09, 0x0a, 0x0a, "s_cmpk_lg_u32"),
429 (0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, "s_cmpk_gt_u32"),
430 (0x0c, 0x0c, 0x0b, 0x0b, 0x0c, 0x0c, "s_cmpk_ge_u32"),
431 (0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d, "s_cmpk_lt_u32"),
432 (0x0e, 0x0e, 0x0d, 0x0d, 0x0e, 0x0e, "s_cmpk_le_u32"),
433 (0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f, "s_addk_i32"),
434 (0x10, 0x10, 0x0f, 0x0f, 0x10, 0x10, "s_mulk_i32"),
435 (0x11, 0x11, 0x10, 0x10, -1, -1, "s_cbranch_i_fork", InstrClass.Branch),
436 (0x12, 0x12, 0x11, 0x11, 0x12, 0x11, "s_getreg_b32"),
437 (0x13, 0x13, 0x12, 0x12, 0x13, 0x12, "s_setreg_b32"),
438 (0x15, 0x15, 0x14, 0x14, 0x15, 0x13, "s_setreg_imm32_b32"), # requires 32bit literal
439 ( -1, -1, 0x15, 0x15, 0x16, 0x14, "s_call_b64", InstrClass.Branch),
440 ( -1, -1, -1, -1, 0x17, 0x18, "s_waitcnt_vscnt", InstrClass.Waitcnt),
441 ( -1, -1, -1, -1, 0x18, 0x19, "s_waitcnt_vmcnt", InstrClass.Waitcnt),
442 ( -1, -1, -1, -1, 0x19, 0x1a, "s_waitcnt_expcnt", InstrClass.Waitcnt),
443 ( -1, -1, -1, -1, 0x1a, 0x1b, "s_waitcnt_lgkmcnt", InstrClass.Waitcnt),
444 ( -1, -1, -1, -1, 0x1b, 0x16, "s_subvector_loop_begin", InstrClass.Branch),
445 ( -1, -1, -1, -1, 0x1c, 0x17, "s_subvector_loop_end", InstrClass.Branch),
447 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, cls) in default_class(SOPK, InstrClass.Salu):
448 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOPK, cls)
451 # SOP1 instructions: 1 input, 1 output (+optional SCC)
453 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
454 (0x03, 0x03, 0x00, 0x00, 0x03, 0x00, "s_mov_b32"),
455 (0x04, 0x04, 0x01, 0x01, 0x04, 0x01, "s_mov_b64"),
456 (0x05, 0x05, 0x02, 0x02, 0x05, 0x02, "s_cmov_b32"),
457 (0x06, 0x06, 0x03, 0x03, 0x06, 0x03, "s_cmov_b64"),
458 (0x07, 0x07, 0x04, 0x04, 0x07, 0x1e, "s_not_b32"),
459 (0x08, 0x08, 0x05, 0x05, 0x08, 0x1f, "s_not_b64"),
460 (0x09, 0x09, 0x06, 0x06, 0x09, 0x1c, "s_wqm_b32"),
461 (0x0a, 0x0a, 0x07, 0x07, 0x0a, 0x1d, "s_wqm_b64"),
462 (0x0b, 0x0b, 0x08, 0x08, 0x0b, 0x04, "s_brev_b32"),
463 (0x0c, 0x0c, 0x09, 0x09, 0x0c, 0x05, "s_brev_b64"),
464 (0x0d, 0x0d, 0x0a, 0x0a, 0x0d, 0x16, "s_bcnt0_i32_b32"),
465 (0x0e, 0x0e, 0x0b, 0x0b, 0x0e, 0x17, "s_bcnt0_i32_b64"),
466 (0x0f, 0x0f, 0x0c, 0x0c, 0x0f, 0x18, "s_bcnt1_i32_b32"),
467 (0x10, 0x10, 0x0d, 0x0d, 0x10, 0x19, "s_bcnt1_i32_b64"),
468 (0x11, 0x11, 0x0e, 0x0e, 0x11, -1, "s_ff0_i32_b32"),
469 (0x12, 0x12, 0x0f, 0x0f, 0x12, -1, "s_ff0_i32_b64"),
470 (0x13, 0x13, 0x10, 0x10, 0x13, 0x08, "s_ff1_i32_b32"), #s_ctz_i32_b32 in GFX11
471 (0x14, 0x14, 0x11, 0x11, 0x14, 0x09, "s_ff1_i32_b64"), #s_ctz_i32_b64 in GFX11
472 (0x15, 0x15, 0x12, 0x12, 0x15, 0x0a, "s_flbit_i32_b32"), #s_clz_i32_u32 in GFX11
473 (0x16, 0x16, 0x13, 0x13, 0x16, 0x0b, "s_flbit_i32_b64"), #s_clz_i32_u64 in GFX11
474 (0x17, 0x17, 0x14, 0x14, 0x17, 0x0c, "s_flbit_i32"), #s_cls_i32 in GFX11
475 (0x18, 0x18, 0x15, 0x15, 0x18, 0x0d, "s_flbit_i32_i64"), #s_cls_i32_i64 in GFX11
476 (0x19, 0x19, 0x16, 0x16, 0x19, 0x0e, "s_sext_i32_i8"),
477 (0x1a, 0x1a, 0x17, 0x17, 0x1a, 0x0f, "s_sext_i32_i16"),
478 (0x1b, 0x1b, 0x18, 0x18, 0x1b, 0x10, "s_bitset0_b32"),
479 (0x1c, 0x1c, 0x19, 0x19, 0x1c, 0x11, "s_bitset0_b64"),
480 (0x1d, 0x1d, 0x1a, 0x1a, 0x1d, 0x12, "s_bitset1_b32"),
481 (0x1e, 0x1e, 0x1b, 0x1b, 0x1e, 0x13, "s_bitset1_b64"),
482 (0x1f, 0x1f, 0x1c, 0x1c, 0x1f, 0x47, "s_getpc_b64"),
483 (0x20, 0x20, 0x1d, 0x1d, 0x20, 0x48, "s_setpc_b64", InstrClass.Branch),
484 (0x21, 0x21, 0x1e, 0x1e, 0x21, 0x49, "s_swappc_b64", InstrClass.Branch),
485 (0x22, 0x22, 0x1f, 0x1f, 0x22, 0x4a, "s_rfe_b64", InstrClass.Branch),
486 (0x24, 0x24, 0x20, 0x20, 0x24, 0x21, "s_and_saveexec_b64"),
487 (0x25, 0x25, 0x21, 0x21, 0x25, 0x23, "s_or_saveexec_b64"),
488 (0x26, 0x26, 0x22, 0x22, 0x26, 0x25, "s_xor_saveexec_b64"),
489 (0x27, 0x27, 0x23, 0x23, 0x27, 0x31, "s_andn2_saveexec_b64"), #s_and_not1_saveexec_b64 in GFX11
490 (0x28, 0x28, 0x24, 0x24, 0x28, 0x33, "s_orn2_saveexec_b64"), #s_or_not1_saveexec_b64 in GFX11
491 (0x29, 0x29, 0x25, 0x25, 0x29, 0x27, "s_nand_saveexec_b64"),
492 (0x2a, 0x2a, 0x26, 0x26, 0x2a, 0x29, "s_nor_saveexec_b64"),
493 (0x2b, 0x2b, 0x27, 0x27, 0x2b, 0x2b, "s_xnor_saveexec_b64"),
494 (0x2c, 0x2c, 0x28, 0x28, 0x2c, 0x1a, "s_quadmask_b32"),
495 (0x2d, 0x2d, 0x29, 0x29, 0x2d, 0x1b, "s_quadmask_b64"),
496 (0x2e, 0x2e, 0x2a, 0x2a, 0x2e, 0x40, "s_movrels_b32"),
497 (0x2f, 0x2f, 0x2b, 0x2b, 0x2f, 0x41, "s_movrels_b64"),
498 (0x30, 0x30, 0x2c, 0x2c, 0x30, 0x42, "s_movreld_b32"),
499 (0x31, 0x31, 0x2d, 0x2d, 0x31, 0x43, "s_movreld_b64"),
500 (0x32, 0x32, 0x2e, 0x2e, -1, -1, "s_cbranch_join", InstrClass.Branch),
501 (0x34, 0x34, 0x30, 0x30, 0x34, 0x15, "s_abs_i32"),
502 (0x35, 0x35, -1, -1, 0x35, -1, "s_mov_fed_b32"),
503 ( -1, -1, 0x32, 0x32, -1, -1, "s_set_gpr_idx_idx"),
504 ( -1, -1, -1, 0x33, 0x37, 0x2d, "s_andn1_saveexec_b64"), #s_and_not0_savexec_b64 in GFX11
505 ( -1, -1, -1, 0x34, 0x38, 0x2f, "s_orn1_saveexec_b64"), #s_or_not0_savexec_b64 in GFX11
506 ( -1, -1, -1, 0x35, 0x39, 0x35, "s_andn1_wrexec_b64"), #s_and_not0_wrexec_b64 in GFX11
507 ( -1, -1, -1, 0x36, 0x3a, 0x37, "s_andn2_wrexec_b64"), #s_and_not1_wrexec_b64 in GFX11
508 ( -1, -1, -1, 0x37, 0x3b, 0x14, "s_bitreplicate_b64_b32"),
509 ( -1, -1, -1, -1, 0x3c, 0x20, "s_and_saveexec_b32"),
510 ( -1, -1, -1, -1, 0x3d, 0x22, "s_or_saveexec_b32"),
511 ( -1, -1, -1, -1, 0x3e, 0x24, "s_xor_saveexec_b32"),
512 ( -1, -1, -1, -1, 0x3f, 0x30, "s_andn2_saveexec_b32"), #s_and_not1_saveexec_b32 in GFX11
513 ( -1, -1, -1, -1, 0x40, 0x32, "s_orn2_saveexec_b32"), #s_or_not1_saveexec_b32 in GFX11
514 ( -1, -1, -1, -1, 0x41, 0x26, "s_nand_saveexec_b32"),
515 ( -1, -1, -1, -1, 0x42, 0x28, "s_nor_saveexec_b32"),
516 ( -1, -1, -1, -1, 0x43, 0x2a, "s_xnor_saveexec_b32"),
517 ( -1, -1, -1, -1, 0x44, 0x2c, "s_andn1_saveexec_b32"), #s_and_not0_savexec_b32 in GFX11
518 ( -1, -1, -1, -1, 0x45, 0x2e, "s_orn1_saveexec_b32"), #s_or_not0_savexec_b32 in GFX11
519 ( -1, -1, -1, -1, 0x46, 0x34, "s_andn1_wrexec_b32"), #s_and_not0_wrexec_b32 in GFX11
520 ( -1, -1, -1, -1, 0x47, 0x36, "s_andn2_wrexec_b32"), #s_and_not1_wrexec_b32 in GFX11
521 ( -1, -1, -1, -1, 0x49, 0x44, "s_movrelsd_2_b32"),
522 ( -1, -1, -1, -1, -1, 0x4c, "s_sendmsg_rtn_b32"),
523 ( -1, -1, -1, -1, -1, 0x4d, "s_sendmsg_rtn_b64"),
524 # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1.
525 ( -1, -1, -1, -1, -1, -1, "p_constaddr_getpc"),
526 ( -1, -1, -1, -1, -1, -1, "p_resumeaddr_getpc"),
527 ( -1, -1, -1, -1, -1, -1, "p_load_symbol"),
529 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, cls) in default_class(SOP1, InstrClass.Salu):
530 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOP1, cls)
533 # SOPC instructions: 2 inputs and 0 outputs (+SCC)
535 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
536 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_cmp_eq_i32"),
537 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "s_cmp_lg_i32"),
538 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "s_cmp_gt_i32"),
539 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "s_cmp_ge_i32"),
540 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "s_cmp_lt_i32"),
541 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "s_cmp_le_i32"),
542 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "s_cmp_eq_u32"),
543 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "s_cmp_lg_u32"),
544 (0x08, 0x08, 0x08, 0x08, 0x08, 0x08, "s_cmp_gt_u32"),
545 (0x09, 0x09, 0x09, 0x09, 0x09, 0x09, "s_cmp_ge_u32"),
546 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cmp_lt_u32"),
547 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cmp_le_u32"),
548 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_bitcmp0_b32"),
549 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_bitcmp1_b32"),
550 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_bitcmp0_b64"),
551 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_bitcmp1_b64"),
552 (0x10, 0x10, 0x10, 0x10, -1, -1, "s_setvskip"),
553 ( -1, -1, 0x11, 0x11, -1, -1, "s_set_gpr_idx_on"),
554 ( -1, -1, 0x12, 0x12, 0x12, 0x10, "s_cmp_eq_u64"),
555 ( -1, -1, 0x13, 0x13, 0x13, 0x11, "s_cmp_lg_u64"),
557 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in SOPC:
558 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOPC, InstrClass.Salu)
561 # SOPP instructions: 0 inputs (+optional scc/vcc), 0 outputs
563 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
564 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_nop"),
565 (0x01, 0x01, 0x01, 0x01, 0x01, 0x30, "s_endpgm"),
566 (0x02, 0x02, 0x02, 0x02, 0x02, 0x20, "s_branch", InstrClass.Branch),
567 ( -1, -1, 0x03, 0x03, 0x03, 0x34, "s_wakeup"),
568 (0x04, 0x04, 0x04, 0x04, 0x04, 0x21, "s_cbranch_scc0", InstrClass.Branch),
569 (0x05, 0x05, 0x05, 0x05, 0x05, 0x22, "s_cbranch_scc1", InstrClass.Branch),
570 (0x06, 0x06, 0x06, 0x06, 0x06, 0x23, "s_cbranch_vccz", InstrClass.Branch),
571 (0x07, 0x07, 0x07, 0x07, 0x07, 0x24, "s_cbranch_vccnz", InstrClass.Branch),
572 (0x08, 0x08, 0x08, 0x08, 0x08, 0x25, "s_cbranch_execz", InstrClass.Branch),
573 (0x09, 0x09, 0x09, 0x09, 0x09, 0x26, "s_cbranch_execnz", InstrClass.Branch),
574 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x3d, "s_barrier", InstrClass.Barrier),
575 ( -1, 0x0b, 0x0b, 0x0b, 0x0b, 0x01, "s_setkill"),
576 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x09, "s_waitcnt", InstrClass.Waitcnt),
577 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x02, "s_sethalt"),
578 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x03, "s_sleep"),
579 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x35, "s_setprio"),
580 (0x10, 0x10, 0x10, 0x10, 0x10, 0x36, "s_sendmsg", InstrClass.Sendmsg),
581 (0x11, 0x11, 0x11, 0x11, 0x11, 0x37, "s_sendmsghalt", InstrClass.Sendmsg),
582 (0x12, 0x12, 0x12, 0x12, 0x12, 0x10, "s_trap", InstrClass.Branch),
583 (0x13, 0x13, 0x13, 0x13, 0x13, 0x3c, "s_icache_inv"),
584 (0x14, 0x14, 0x14, 0x14, 0x14, 0x38, "s_incperflevel"),
585 (0x15, 0x15, 0x15, 0x15, 0x15, 0x39, "s_decperflevel"),
586 (0x16, 0x16, 0x16, 0x16, 0x16, 0x3a, "s_ttracedata"),
587 ( -1, 0x17, 0x17, 0x17, 0x17, 0x27, "s_cbranch_cdbgsys", InstrClass.Branch),
588 ( -1, 0x18, 0x18, 0x18, 0x18, 0x28, "s_cbranch_cdbguser", InstrClass.Branch),
589 ( -1, 0x19, 0x19, 0x19, 0x19, 0x29, "s_cbranch_cdbgsys_or_user", InstrClass.Branch),
590 ( -1, 0x1a, 0x1a, 0x1a, 0x1a, 0x2a, "s_cbranch_cdbgsys_and_user", InstrClass.Branch),
591 ( -1, -1, 0x1b, 0x1b, 0x1b, 0x31, "s_endpgm_saved"),
592 ( -1, -1, 0x1c, 0x1c, -1, -1, "s_set_gpr_idx_off"),
593 ( -1, -1, 0x1d, 0x1d, -1, -1, "s_set_gpr_idx_mode"),
594 ( -1, -1, -1, 0x1e, 0x1e, -1, "s_endpgm_ordered_ps_done"),
595 ( -1, -1, -1, -1, 0x1f, 0x1f, "s_code_end"),
596 ( -1, -1, -1, -1, 0x20, 0x04, "s_inst_prefetch"), #s_set_inst_prefetch_distance in GFX11
597 ( -1, -1, -1, -1, 0x21, 0x05, "s_clause"),
598 ( -1, -1, -1, -1, 0x22, 0x0a, "s_wait_idle", InstrClass.Waitcnt),
599 ( -1, -1, -1, -1, 0x23, 0x08, "s_waitcnt_depctr", InstrClass.Waitcnt),
600 ( -1, -1, -1, -1, 0x24, 0x11, "s_round_mode"),
601 ( -1, -1, -1, -1, 0x25, 0x12, "s_denorm_mode"),
602 ( -1, -1, -1, -1, 0x26, 0x3b, "s_ttracedata_imm"),
603 ( -1, -1, -1, -1, -1, 0x07, "s_delay_alu", InstrClass.Waitcnt),
604 ( -1, -1, -1, -1, -1, 0x0b, "s_wait_event"),
606 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, cls) in default_class(SOPP, InstrClass.Salu):
607 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOPP, cls)
610 # SMEM instructions: sbase input (2 sgpr), potentially 2 offset inputs, 1 sdata input/output
611 # Unlike GFX10, GFX10.3 does not have SMEM store, atomic or scratch instructions
613 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
614 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_load_dword"), #s_load_b32 in GFX11
615 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "s_load_dwordx2"), #s_load_b64 in GFX11
616 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "s_load_dwordx4"), #s_load_b128 in GFX11
617 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "s_load_dwordx8"), #s_load_b256 in GFX11
618 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "s_load_dwordx16"), #s_load_b512 in GFX11
619 ( -1, -1, -1, 0x05, 0x05, -1, "s_scratch_load_dword"),
620 ( -1, -1, -1, 0x06, 0x06, -1, "s_scratch_load_dwordx2"),
621 ( -1, -1, -1, 0x07, 0x07, -1, "s_scratch_load_dwordx4"),
622 (0x08, 0x08, 0x08, 0x08, 0x08, 0x08, "s_buffer_load_dword"), #s_buffer_load_b32 in GFX11
623 (0x09, 0x09, 0x09, 0x09, 0x09, 0x09, "s_buffer_load_dwordx2"), #s_buffer_load_b64 in GFX11
624 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_buffer_load_dwordx4"), #s_buffer_load_b128 in GFX11
625 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_buffer_load_dwordx8"), #s_buffer_load_b256 in GFX11
626 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_buffer_load_dwordx16"), #s_buffer_load_b512 in GFX11
627 ( -1, -1, 0x10, 0x10, 0x10, -1, "s_store_dword"),
628 ( -1, -1, 0x11, 0x11, 0x11, -1, "s_store_dwordx2"),
629 ( -1, -1, 0x12, 0x12, 0x12, -1, "s_store_dwordx4"),
630 ( -1, -1, -1, 0x15, 0x15, -1, "s_scratch_store_dword"),
631 ( -1, -1, -1, 0x16, 0x16, -1, "s_scratch_store_dwordx2"),
632 ( -1, -1, -1, 0x17, 0x17, -1, "s_scratch_store_dwordx4"),
633 ( -1, -1, 0x18, 0x18, 0x18, -1, "s_buffer_store_dword"),
634 ( -1, -1, 0x19, 0x19, 0x19, -1, "s_buffer_store_dwordx2"),
635 ( -1, -1, 0x1a, 0x1a, 0x1a, -1, "s_buffer_store_dwordx4"),
636 ( -1, -1, 0x1f, 0x1f, 0x1f, 0x20, "s_gl1_inv"),
637 (0x1f, 0x1f, 0x20, 0x20, 0x20, 0x21, "s_dcache_inv"),
638 ( -1, -1, 0x21, 0x21, 0x21, -1, "s_dcache_wb"),
639 ( -1, 0x1d, 0x22, 0x22, -1, -1, "s_dcache_inv_vol"),
640 ( -1, -1, 0x23, 0x23, -1, -1, "s_dcache_wb_vol"),
641 (0x1e, 0x1e, 0x24, 0x24, 0x24, -1, "s_memtime"), #GFX6-GFX10
642 ( -1, -1, 0x25, 0x25, 0x25, -1, "s_memrealtime"),
643 ( -1, -1, 0x26, 0x26, 0x26, 0x22, "s_atc_probe"),
644 ( -1, -1, 0x27, 0x27, 0x27, 0x23, "s_atc_probe_buffer"),
645 ( -1, -1, -1, 0x28, 0x28, -1, "s_dcache_discard"),
646 ( -1, -1, -1, 0x29, 0x29, -1, "s_dcache_discard_x2"),
647 ( -1, -1, -1, -1, 0x2a, -1, "s_get_waveid_in_workgroup"),
648 ( -1, -1, -1, 0x40, 0x40, -1, "s_buffer_atomic_swap"),
649 ( -1, -1, -1, 0x41, 0x41, -1, "s_buffer_atomic_cmpswap"),
650 ( -1, -1, -1, 0x42, 0x42, -1, "s_buffer_atomic_add"),
651 ( -1, -1, -1, 0x43, 0x43, -1, "s_buffer_atomic_sub"),
652 ( -1, -1, -1, 0x44, 0x44, -1, "s_buffer_atomic_smin"),
653 ( -1, -1, -1, 0x45, 0x45, -1, "s_buffer_atomic_umin"),
654 ( -1, -1, -1, 0x46, 0x46, -1, "s_buffer_atomic_smax"),
655 ( -1, -1, -1, 0x47, 0x47, -1, "s_buffer_atomic_umax"),
656 ( -1, -1, -1, 0x48, 0x48, -1, "s_buffer_atomic_and"),
657 ( -1, -1, -1, 0x49, 0x49, -1, "s_buffer_atomic_or"),
658 ( -1, -1, -1, 0x4a, 0x4a, -1, "s_buffer_atomic_xor"),
659 ( -1, -1, -1, 0x4b, 0x4b, -1, "s_buffer_atomic_inc"),
660 ( -1, -1, -1, 0x4c, 0x4c, -1, "s_buffer_atomic_dec"),
661 ( -1, -1, -1, 0x60, 0x60, -1, "s_buffer_atomic_swap_x2"),
662 ( -1, -1, -1, 0x61, 0x61, -1, "s_buffer_atomic_cmpswap_x2"),
663 ( -1, -1, -1, 0x62, 0x62, -1, "s_buffer_atomic_add_x2"),
664 ( -1, -1, -1, 0x63, 0x63, -1, "s_buffer_atomic_sub_x2"),
665 ( -1, -1, -1, 0x64, 0x64, -1, "s_buffer_atomic_smin_x2"),
666 ( -1, -1, -1, 0x65, 0x65, -1, "s_buffer_atomic_umin_x2"),
667 ( -1, -1, -1, 0x66, 0x66, -1, "s_buffer_atomic_smax_x2"),
668 ( -1, -1, -1, 0x67, 0x67, -1, "s_buffer_atomic_umax_x2"),
669 ( -1, -1, -1, 0x68, 0x68, -1, "s_buffer_atomic_and_x2"),
670 ( -1, -1, -1, 0x69, 0x69, -1, "s_buffer_atomic_or_x2"),
671 ( -1, -1, -1, 0x6a, 0x6a, -1, "s_buffer_atomic_xor_x2"),
672 ( -1, -1, -1, 0x6b, 0x6b, -1, "s_buffer_atomic_inc_x2"),
673 ( -1, -1, -1, 0x6c, 0x6c, -1, "s_buffer_atomic_dec_x2"),
674 ( -1, -1, -1, 0x80, 0x80, -1, "s_atomic_swap"),
675 ( -1, -1, -1, 0x81, 0x81, -1, "s_atomic_cmpswap"),
676 ( -1, -1, -1, 0x82, 0x82, -1, "s_atomic_add"),
677 ( -1, -1, -1, 0x83, 0x83, -1, "s_atomic_sub"),
678 ( -1, -1, -1, 0x84, 0x84, -1, "s_atomic_smin"),
679 ( -1, -1, -1, 0x85, 0x85, -1, "s_atomic_umin"),
680 ( -1, -1, -1, 0x86, 0x86, -1, "s_atomic_smax"),
681 ( -1, -1, -1, 0x87, 0x87, -1, "s_atomic_umax"),
682 ( -1, -1, -1, 0x88, 0x88, -1, "s_atomic_and"),
683 ( -1, -1, -1, 0x89, 0x89, -1, "s_atomic_or"),
684 ( -1, -1, -1, 0x8a, 0x8a, -1, "s_atomic_xor"),
685 ( -1, -1, -1, 0x8b, 0x8b, -1, "s_atomic_inc"),
686 ( -1, -1, -1, 0x8c, 0x8c, -1, "s_atomic_dec"),
687 ( -1, -1, -1, 0xa0, 0xa0, -1, "s_atomic_swap_x2"),
688 ( -1, -1, -1, 0xa1, 0xa1, -1, "s_atomic_cmpswap_x2"),
689 ( -1, -1, -1, 0xa2, 0xa2, -1, "s_atomic_add_x2"),
690 ( -1, -1, -1, 0xa3, 0xa3, -1, "s_atomic_sub_x2"),
691 ( -1, -1, -1, 0xa4, 0xa4, -1, "s_atomic_smin_x2"),
692 ( -1, -1, -1, 0xa5, 0xa5, -1, "s_atomic_umin_x2"),
693 ( -1, -1, -1, 0xa6, 0xa6, -1, "s_atomic_smax_x2"),
694 ( -1, -1, -1, 0xa7, 0xa7, -1, "s_atomic_umax_x2"),
695 ( -1, -1, -1, 0xa8, 0xa8, -1, "s_atomic_and_x2"),
696 ( -1, -1, -1, 0xa9, 0xa9, -1, "s_atomic_or_x2"),
697 ( -1, -1, -1, 0xaa, 0xaa, -1, "s_atomic_xor_x2"),
698 ( -1, -1, -1, 0xab, 0xab, -1, "s_atomic_inc_x2"),
699 ( -1, -1, -1, 0xac, 0xac, -1, "s_atomic_dec_x2"),
701 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in SMEM:
702 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SMEM, InstrClass.SMem, is_atomic = "atomic" in name)
705 # VOP2 instructions: 2 inputs, 1 output (+ optional vcc)
706 # TODO: misses some GFX6_7 opcodes which were shifted to VOP3 in GFX8
708 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name, input modifiers, output modifiers
709 (0x00, 0x00, 0x00, 0x00, 0x01, 0x01, "v_cndmask_b32", True, False),
710 (0x01, 0x01, -1, -1, -1, -1, "v_readlane_b32", False, False),
711 (0x02, 0x02, -1, -1, -1, -1, "v_writelane_b32", False, False),
712 (0x03, 0x03, 0x01, 0x01, 0x03, 0x03, "v_add_f32", True, True),
713 (0x04, 0x04, 0x02, 0x02, 0x04, 0x04, "v_sub_f32", True, True),
714 (0x05, 0x05, 0x03, 0x03, 0x05, 0x05, "v_subrev_f32", True, True),
715 (0x06, 0x06, -1, -1, 0x06, -1, "v_mac_legacy_f32", True, True), #GFX6,7,10
716 ( -1, -1, -1, -1, 0x06, 0x06, "v_fmac_legacy_f32", True, True), #GFX10.3+, v_fmac_dx9_zero_f32 in GFX11
717 (0x07, 0x07, 0x04, 0x04, 0x07, 0x07, "v_mul_legacy_f32", True, True), #v_mul_dx9_zero_f32 in GFX11
718 (0x08, 0x08, 0x05, 0x05, 0x08, 0x08, "v_mul_f32", True, True),
719 (0x09, 0x09, 0x06, 0x06, 0x09, 0x09, "v_mul_i32_i24", False, False),
720 (0x0a, 0x0a, 0x07, 0x07, 0x0a, 0x0a, "v_mul_hi_i32_i24", False, False),
721 (0x0b, 0x0b, 0x08, 0x08, 0x0b, 0x0b, "v_mul_u32_u24", False, False),
722 (0x0c, 0x0c, 0x09, 0x09, 0x0c, 0x0c, "v_mul_hi_u32_u24", False, False),
723 ( -1, -1, -1, 0x39, 0x0d, -1, "v_dot4c_i32_i8", False, False),
724 (0x0d, 0x0d, -1, -1, -1, -1, "v_min_legacy_f32", True, True),
725 (0x0e, 0x0e, -1, -1, -1, -1, "v_max_legacy_f32", True, True),
726 (0x0f, 0x0f, 0x0a, 0x0a, 0x0f, 0x0f, "v_min_f32", True, True),
727 (0x10, 0x10, 0x0b, 0x0b, 0x10, 0x10, "v_max_f32", True, True),
728 (0x11, 0x11, 0x0c, 0x0c, 0x11, 0x11, "v_min_i32", False, False),
729 (0x12, 0x12, 0x0d, 0x0d, 0x12, 0x12, "v_max_i32", False, False),
730 (0x13, 0x13, 0x0e, 0x0e, 0x13, 0x13, "v_min_u32", False, False),
731 (0x14, 0x14, 0x0f, 0x0f, 0x14, 0x14, "v_max_u32", False, False),
732 (0x15, 0x15, -1, -1, -1, -1, "v_lshr_b32", False, False),
733 (0x16, 0x16, 0x10, 0x10, 0x16, 0x19, "v_lshrrev_b32", False, False),
734 (0x17, 0x17, -1, -1, -1, -1, "v_ashr_i32", False, False),
735 (0x18, 0x18, 0x11, 0x11, 0x18, 0x1a, "v_ashrrev_i32", False, False),
736 (0x19, 0x19, -1, -1, -1, -1, "v_lshl_b32", False, False),
737 (0x1a, 0x1a, 0x12, 0x12, 0x1a, 0x18, "v_lshlrev_b32", False, False),
738 (0x1b, 0x1b, 0x13, 0x13, 0x1b, 0x1b, "v_and_b32", False, False),
739 (0x1c, 0x1c, 0x14, 0x14, 0x1c, 0x1c, "v_or_b32", False, False),
740 (0x1d, 0x1d, 0x15, 0x15, 0x1d, 0x1d, "v_xor_b32", False, False),
741 ( -1, -1, -1, -1, 0x1e, 0x1e, "v_xnor_b32", False, False),
742 (0x1f, 0x1f, 0x16, 0x16, 0x1f, -1, "v_mac_f32", True, True),
743 (0x20, 0x20, 0x17, 0x17, 0x20, -1, "v_madmk_f32", False, False),
744 (0x21, 0x21, 0x18, 0x18, 0x21, -1, "v_madak_f32", False, False),
745 (0x24, 0x24, -1, -1, -1, -1, "v_mbcnt_hi_u32_b32", False, False),
746 (0x25, 0x25, 0x19, 0x19, -1, -1, "v_add_co_u32", False, False), # VOP3B only in RDNA
747 (0x26, 0x26, 0x1a, 0x1a, -1, -1, "v_sub_co_u32", False, False), # VOP3B only in RDNA
748 (0x27, 0x27, 0x1b, 0x1b, -1, -1, "v_subrev_co_u32", False, False), # VOP3B only in RDNA
749 (0x28, 0x28, 0x1c, 0x1c, 0x28, 0x20, "v_addc_co_u32", False, False), # v_add_co_ci_u32 in RDNA
750 (0x29, 0x29, 0x1d, 0x1d, 0x29, 0x21, "v_subb_co_u32", False, False), # v_sub_co_ci_u32 in RDNA
751 (0x2a, 0x2a, 0x1e, 0x1e, 0x2a, 0x22, "v_subbrev_co_u32", False, False), # v_subrev_co_ci_u32 in RDNA
752 ( -1, -1, -1, -1, 0x2b, 0x2b, "v_fmac_f32", True, True),
753 ( -1, -1, -1, -1, 0x2c, 0x2c, "v_fmamk_f32", False, False),
754 ( -1, -1, -1, -1, 0x2d, 0x2d, "v_fmaak_f32", False, False),
755 (0x2f, 0x2f, -1, -1, 0x2f, 0x2f, "v_cvt_pkrtz_f16_f32", True, False), #v_cvt_pk_rtz_f16_f32 in GFX11
756 ( -1, -1, 0x1f, 0x1f, 0x32, 0x32, "v_add_f16", True, True),
757 ( -1, -1, 0x20, 0x20, 0x33, 0x33, "v_sub_f16", True, True),
758 ( -1, -1, 0x21, 0x21, 0x34, 0x34, "v_subrev_f16", True, True),
759 ( -1, -1, 0x22, 0x22, 0x35, 0x35, "v_mul_f16", True, True),
760 ( -1, -1, 0x23, 0x23, -1, -1, "v_mac_f16", True, True),
761 ( -1, -1, 0x24, 0x24, -1, -1, "v_madmk_f16", False, False),
762 ( -1, -1, 0x25, 0x25, -1, -1, "v_madak_f16", False, False),
763 ( -1, -1, 0x26, 0x26, -1, -1, "v_add_u16", False, False),
764 ( -1, -1, 0x27, 0x27, -1, -1, "v_sub_u16", False, False),
765 ( -1, -1, 0x28, 0x28, -1, -1, "v_subrev_u16", False, False),
766 ( -1, -1, 0x29, 0x29, -1, -1, "v_mul_lo_u16", False, False),
767 ( -1, -1, 0x2a, 0x2a, -1, -1, "v_lshlrev_b16", False, False),
768 ( -1, -1, 0x2b, 0x2b, -1, -1, "v_lshrrev_b16", False, False),
769 ( -1, -1, 0x2c, 0x2c, -1, -1, "v_ashrrev_i16", False, False),
770 ( -1, -1, 0x2d, 0x2d, 0x39, 0x39, "v_max_f16", True, True),
771 ( -1, -1, 0x2e, 0x2e, 0x3a, 0x3a, "v_min_f16", True, True),
772 ( -1, -1, 0x2f, 0x2f, -1, -1, "v_max_u16", False, False),
773 ( -1, -1, 0x30, 0x30, -1, -1, "v_max_i16", False, False),
774 ( -1, -1, 0x31, 0x31, -1, -1, "v_min_u16", False, False),
775 ( -1, -1, 0x32, 0x32, -1, -1, "v_min_i16", False, False),
776 ( -1, -1, 0x33, 0x33, 0x3b, 0x3b, "v_ldexp_f16", False, True),
777 ( -1, -1, -1, 0x34, 0x25, 0x25, "v_add_u32", False, False), # called v_add_nc_u32 in RDNA
778 ( -1, -1, -1, 0x35, 0x26, 0x26, "v_sub_u32", False, False), # called v_sub_nc_u32 in RDNA
779 ( -1, -1, -1, 0x36, 0x27, 0x27, "v_subrev_u32", False, False), # called v_subrev_nc_u32 in RDNA
780 ( -1, -1, -1, -1, 0x36, 0x36, "v_fmac_f16", True, True),
781 ( -1, -1, -1, -1, 0x37, 0x37, "v_fmamk_f16", False, False),
782 ( -1, -1, -1, -1, 0x38, 0x38, "v_fmaak_f16", False, False),
783 ( -1, -1, -1, -1, 0x3c, 0x3c, "v_pk_fmac_f16", False, False),
784 ( -1, -1, -1, 0x37, 0x02, 0x02, "v_dot2c_f32_f16", False, False), #v_dot2acc_f32_f16 in GFX11
786 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod) in VOP2:
787 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP2, InstrClass.Valu32, in_mod, out_mod)
790 # VOP1 instructions: instructions with 1 input and 1 output
792 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name, input_modifiers, output_modifiers
793 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "v_nop", False, False),
794 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "v_mov_b32", False, False),
795 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "v_readfirstlane_b32", False, False),
796 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "v_cvt_i32_f64", True, False, InstrClass.ValuDoubleConvert),
797 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "v_cvt_f64_i32", False, True, InstrClass.ValuDoubleConvert),
798 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "v_cvt_f32_i32", False, True),
799 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "v_cvt_f32_u32", False, True),
800 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "v_cvt_u32_f32", True, False),
801 (0x08, 0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False),
802 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True),
803 ( -1, -1, -1, -1, -1, -1, "p_cvt_f16_f32_rtne", True, True),
804 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True),
805 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False), #v_cvt_nearest_i32_f32 in GFX11
806 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False),#v_cvt_floor_i32_f32 in GFX11
807 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "v_cvt_off_f32_i4", False, True),
808 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "v_cvt_f32_f64", True, True, InstrClass.ValuDoubleConvert),
809 (0x10, 0x10, 0x10, 0x10, 0x10, 0x10, "v_cvt_f64_f32", True, True, InstrClass.ValuDoubleConvert),
810 (0x11, 0x11, 0x11, 0x11, 0x11, 0x11, "v_cvt_f32_ubyte0", False, True),
811 (0x12, 0x12, 0x12, 0x12, 0x12, 0x12, "v_cvt_f32_ubyte1", False, True),
812 (0x13, 0x13, 0x13, 0x13, 0x13, 0x13, "v_cvt_f32_ubyte2", False, True),
813 (0x14, 0x14, 0x14, 0x14, 0x14, 0x14, "v_cvt_f32_ubyte3", False, True),
814 (0x15, 0x15, 0x15, 0x15, 0x15, 0x15, "v_cvt_u32_f64", True, False, InstrClass.ValuDoubleConvert),
815 (0x16, 0x16, 0x16, 0x16, 0x16, 0x16, "v_cvt_f64_u32", False, True, InstrClass.ValuDoubleConvert),
816 ( -1, 0x17, 0x17, 0x17, 0x17, 0x17, "v_trunc_f64", True, True, InstrClass.ValuDouble),
817 ( -1, 0x18, 0x18, 0x18, 0x18, 0x18, "v_ceil_f64", True, True, InstrClass.ValuDouble),
818 ( -1, 0x19, 0x19, 0x19, 0x19, 0x19, "v_rndne_f64", True, True, InstrClass.ValuDouble),
819 ( -1, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, "v_floor_f64", True, True, InstrClass.ValuDouble),
820 ( -1, -1, -1, -1, 0x1b, 0x1b, "v_pipeflush", False, False),
821 (0x20, 0x20, 0x1b, 0x1b, 0x20, 0x20, "v_fract_f32", True, True),
822 (0x21, 0x21, 0x1c, 0x1c, 0x21, 0x21, "v_trunc_f32", True, True),
823 (0x22, 0x22, 0x1d, 0x1d, 0x22, 0x22, "v_ceil_f32", True, True),
824 (0x23, 0x23, 0x1e, 0x1e, 0x23, 0x23, "v_rndne_f32", True, True),
825 (0x24, 0x24, 0x1f, 0x1f, 0x24, 0x24, "v_floor_f32", True, True),
826 (0x25, 0x25, 0x20, 0x20, 0x25, 0x25, "v_exp_f32", True, True, InstrClass.ValuTranscendental32),
827 (0x26, 0x26, -1, -1, -1, -1, "v_log_clamp_f32", True, True, InstrClass.ValuTranscendental32),
828 (0x27, 0x27, 0x21, 0x21, 0x27, 0x27, "v_log_f32", True, True, InstrClass.ValuTranscendental32),
829 (0x28, 0x28, -1, -1, -1, -1, "v_rcp_clamp_f32", True, True, InstrClass.ValuTranscendental32),
830 (0x29, 0x29, -1, -1, -1, -1, "v_rcp_legacy_f32", True, True, InstrClass.ValuTranscendental32),
831 (0x2a, 0x2a, 0x22, 0x22, 0x2a, 0x2a, "v_rcp_f32", True, True, InstrClass.ValuTranscendental32),
832 (0x2b, 0x2b, 0x23, 0x23, 0x2b, 0x2b, "v_rcp_iflag_f32", True, True, InstrClass.ValuTranscendental32),
833 (0x2c, 0x2c, -1, -1, -1, -1, "v_rsq_clamp_f32", True, True, InstrClass.ValuTranscendental32),
834 (0x2d, 0x2d, -1, -1, -1, -1, "v_rsq_legacy_f32", True, True, InstrClass.ValuTranscendental32),
835 (0x2e, 0x2e, 0x24, 0x24, 0x2e, 0x2e, "v_rsq_f32", True, True, InstrClass.ValuTranscendental32),
836 (0x2f, 0x2f, 0x25, 0x25, 0x2f, 0x2f, "v_rcp_f64", True, True, InstrClass.ValuDoubleTranscendental),
837 (0x30, 0x30, -1, -1, -1, -1, "v_rcp_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental),
838 (0x31, 0x31, 0x26, 0x26, 0x31, 0x31, "v_rsq_f64", True, True, InstrClass.ValuDoubleTranscendental),
839 (0x32, 0x32, -1, -1, -1, -1, "v_rsq_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental),
840 (0x33, 0x33, 0x27, 0x27, 0x33, 0x33, "v_sqrt_f32", True, True, InstrClass.ValuTranscendental32),
841 (0x34, 0x34, 0x28, 0x28, 0x34, 0x34, "v_sqrt_f64", True, True, InstrClass.ValuDoubleTranscendental),
842 (0x35, 0x35, 0x29, 0x29, 0x35, 0x35, "v_sin_f32", True, True, InstrClass.ValuTranscendental32),
843 (0x36, 0x36, 0x2a, 0x2a, 0x36, 0x36, "v_cos_f32", True, True, InstrClass.ValuTranscendental32),
844 (0x37, 0x37, 0x2b, 0x2b, 0x37, 0x37, "v_not_b32", False, False),
845 (0x38, 0x38, 0x2c, 0x2c, 0x38, 0x38, "v_bfrev_b32", False, False),
846 (0x39, 0x39, 0x2d, 0x2d, 0x39, 0x39, "v_ffbh_u32", False, False), #v_clz_i32_u32 in GFX11
847 (0x3a, 0x3a, 0x2e, 0x2e, 0x3a, 0x3a, "v_ffbl_b32", False, False), #v_ctz_i32_b32 in GFX11
848 (0x3b, 0x3b, 0x2f, 0x2f, 0x3b, 0x3b, "v_ffbh_i32", False, False), #v_cls_i32 in GFX11
849 (0x3c, 0x3c, 0x30, 0x30, 0x3c, 0x3c, "v_frexp_exp_i32_f64", True, False, InstrClass.ValuDouble),
850 (0x3d, 0x3d, 0x31, 0x31, 0x3d, 0x3d, "v_frexp_mant_f64", True, False, InstrClass.ValuDouble),
851 (0x3e, 0x3e, 0x32, 0x32, 0x3e, 0x3e, "v_fract_f64", True, True, InstrClass.ValuDouble),
852 (0x3f, 0x3f, 0x33, 0x33, 0x3f, 0x3f, "v_frexp_exp_i32_f32", True, False),
853 (0x40, 0x40, 0x34, 0x34, 0x40, 0x40, "v_frexp_mant_f32", True, False),
854 (0x41, 0x41, 0x35, 0x35, 0x41, -1, "v_clrexcp", False, False),
855 (0x42, 0x42, 0x36, -1, 0x42, 0x42, "v_movreld_b32", False, False),
856 (0x43, 0x43, 0x37, -1, 0x43, 0x43, "v_movrels_b32", False, False),
857 (0x44, 0x44, 0x38, -1, 0x44, 0x44, "v_movrelsd_b32", False, False),
858 ( -1, -1, -1, -1, 0x48, 0x48, "v_movrelsd_2_b32", False, False),
859 ( -1, -1, -1, 0x37, -1, -1, "v_screen_partition_4se_b32", False, False),
860 ( -1, -1, 0x39, 0x39, 0x50, 0x50, "v_cvt_f16_u16", False, True),
861 ( -1, -1, 0x3a, 0x3a, 0x51, 0x51, "v_cvt_f16_i16", False, True),
862 ( -1, -1, 0x3b, 0x3b, 0x52, 0x52, "v_cvt_u16_f16", True, False),
863 ( -1, -1, 0x3c, 0x3c, 0x53, 0x53, "v_cvt_i16_f16", True, False),
864 ( -1, -1, 0x3d, 0x3d, 0x54, 0x54, "v_rcp_f16", True, True, InstrClass.ValuTranscendental32),
865 ( -1, -1, 0x3e, 0x3e, 0x55, 0x55, "v_sqrt_f16", True, True, InstrClass.ValuTranscendental32),
866 ( -1, -1, 0x3f, 0x3f, 0x56, 0x56, "v_rsq_f16", True, True, InstrClass.ValuTranscendental32),
867 ( -1, -1, 0x40, 0x40, 0x57, 0x57, "v_log_f16", True, True, InstrClass.ValuTranscendental32),
868 ( -1, -1, 0x41, 0x41, 0x58, 0x58, "v_exp_f16", True, True, InstrClass.ValuTranscendental32),
869 ( -1, -1, 0x42, 0x42, 0x59, 0x59, "v_frexp_mant_f16", True, False),
870 ( -1, -1, 0x43, 0x43, 0x5a, 0x5a, "v_frexp_exp_i16_f16", True, False),
871 ( -1, -1, 0x44, 0x44, 0x5b, 0x5b, "v_floor_f16", True, True),
872 ( -1, -1, 0x45, 0x45, 0x5c, 0x5c, "v_ceil_f16", True, True),
873 ( -1, -1, 0x46, 0x46, 0x5d, 0x5d, "v_trunc_f16", True, True),
874 ( -1, -1, 0x47, 0x47, 0x5e, 0x5e, "v_rndne_f16", True, True),
875 ( -1, -1, 0x48, 0x48, 0x5f, 0x5f, "v_fract_f16", True, True),
876 ( -1, -1, 0x49, 0x49, 0x60, 0x60, "v_sin_f16", True, True, InstrClass.ValuTranscendental32),
877 ( -1, -1, 0x4a, 0x4a, 0x61, 0x61, "v_cos_f16", True, True, InstrClass.ValuTranscendental32),
878 ( -1, 0x46, 0x4b, 0x4b, -1, -1, "v_exp_legacy_f32", True, True, InstrClass.ValuTranscendental32),
879 ( -1, 0x45, 0x4c, 0x4c, -1, -1, "v_log_legacy_f32", True, True, InstrClass.ValuTranscendental32),
880 ( -1, -1, -1, 0x4f, 0x62, 0x62, "v_sat_pk_u8_i16", False, False),
881 ( -1, -1, -1, 0x4d, 0x63, 0x63, "v_cvt_norm_i16_f16", True, False),
882 ( -1, -1, -1, 0x4e, 0x64, 0x64, "v_cvt_norm_u16_f16", True, False),
883 ( -1, -1, -1, 0x51, 0x65, 0x65, "v_swap_b32", False, False),
884 ( -1, -1, -1, -1, 0x68, 0x68, "v_swaprel_b32", False, False),
885 ( -1, -1, -1, -1, -1, 0x67, "v_permlane64_b32", False, False), #cannot use VOP3
886 ( -1, -1, -1, -1, -1, 0x69, "v_not_b16", False, False),
887 ( -1, -1, -1, -1, -1, 0x6a, "v_cvt_i32_i16", False, False),
888 ( -1, -1, -1, -1, -1, 0x6b, "v_cvt_u32_u16", False, False),
889 ( -1, -1, -1, -1, -1, 0x1c, "v_mov_b16", True, False),
891 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod, cls) in default_class(VOP1, InstrClass.Valu32):
892 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP1, cls, in_mod, out_mod)
898 (0x88, 0x88, 0x10, 0x10, 0x88, 0x7e, "v_cmp_class_f32"),
899 ( -1, -1, 0x14, 0x14, 0x8f, 0x7d, "v_cmp_class_f16"),
900 (0x98, 0x98, 0x11, 0x11, 0x98, 0xfe, "v_cmpx_class_f32"),
901 ( -1, -1, 0x15, 0x15, 0x9f, 0xfd, "v_cmpx_class_f16"),
902 (0xa8, 0xa8, 0x12, 0x12, 0xa8, 0x7f, "v_cmp_class_f64", InstrClass.ValuDouble),
903 (0xb8, 0xb8, 0x13, 0x13, 0xb8, 0xff, "v_cmpx_class_f64", InstrClass.ValuDouble),
905 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, cls) in default_class(VOPC_CLASS, InstrClass.Valu32):
906 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, cls, True, False)
908 COMPF = ["f", "lt", "eq", "le", "gt", "lg", "ge", "o", "u", "nge", "nlg", "ngt", "nle", "neq", "nlt", "tru"]
911 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0x20+i, 0x20+i, 0xc8+i, 0x00+i, "v_cmp_"+COMPF[i]+"_f16")
912 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False)
913 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0x30+i, 0x30+i, 0xd8+i, 0x80+i, "v_cmpx_"+COMPF[i]+"_f16")
914 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False)
915 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0x28+i, 0x28+i, 0xe8+i, 0x08+i, "v_cmp_"+COMPF[i+8]+"_f16")
916 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False)
917 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0x38+i, 0x38+i, 0xf8+i, 0x88+i, "v_cmpx_"+COMPF[i+8]+"_f16")
918 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False)
921 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x00+i, 0x00+i, 0x40+i, 0x40+i, 0x00+i, 0x10+i, "v_cmp_"+COMPF[i]+"_f32")
922 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False)
923 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x10+i, 0x10+i, 0x50+i, 0x50+i, 0x10+i, 0x90+i, "v_cmpx_"+COMPF[i]+"_f32")
924 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False)
925 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x20+i, 0x20+i, 0x60+i, 0x60+i, 0x20+i, 0x20+i, "v_cmp_"+COMPF[i]+"_f64")
926 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.ValuDouble, True, False)
927 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x30+i, 0x30+i, 0x70+i, 0x70+i, 0x30+i, 0xa0+i, "v_cmpx_"+COMPF[i]+"_f64")
928 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.ValuDouble, True, False)
930 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x40+i, 0x40+i, -1, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f32")
931 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x50+i, 0x50+i, -1, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f32")
932 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x60+i, 0x60+i, -1, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f64")
933 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x70+i, 0x70+i, -1, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f64")
935 COMPI = ["f", "lt", "eq", "le", "gt", "lg", "ge", "tru"]
938 for i in [0,7]: # only 0 and 7
939 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xa0+i, 0xa0+i, -1, -1, "v_cmp_"+COMPI[i]+"_i16")
940 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
941 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xb0+i, 0xb0+i, -1, -1, "v_cmpx_"+COMPI[i]+"_i16")
942 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
943 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xa8+i, 0xa8+i, -1, -1, "v_cmp_"+COMPI[i]+"_u16")
944 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
945 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xb8+i, 0xb8+i, -1, -1, "v_cmpx_"+COMPI[i]+"_u16")
946 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
948 for i in range(1, 7): # [1..6]
949 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xa0+i, 0xa0+i, 0x88+i, 0x30+i, "v_cmp_"+COMPI[i]+"_i16")
950 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
951 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xb0+i, 0xb0+i, 0x98+i, 0xb0+i, "v_cmpx_"+COMPI[i]+"_i16")
952 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
953 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xa8+i, 0xa8+i, 0xa8+i, 0x38+i, "v_cmp_"+COMPI[i]+"_u16")
954 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
955 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xb8+i, 0xb8+i, 0xb8+i, 0xb8+i, "v_cmpx_"+COMPI[i]+"_u16")
956 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
959 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x80+i, 0x80+i, 0xc0+i, 0xc0+i, 0x80+i, 0x40+i, "v_cmp_"+COMPI[i]+"_i32")
960 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
961 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x90+i, 0x90+i, 0xd0+i, 0xd0+i, 0x90+i, 0xc0+i, "v_cmpx_"+COMPI[i]+"_i32")
962 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
963 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xa0+i, 0xa0+i, 0xe0+i, 0xe0+i, 0xa0+i, 0x50+i, "v_cmp_"+COMPI[i]+"_i64")
964 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu64)
965 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xb0+i, 0xb0+i, 0xf0+i, 0xf0+i, 0xb0+i, 0xd0+i, "v_cmpx_"+COMPI[i]+"_i64")
966 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu64)
967 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xc0+i, 0xc0+i, 0xc8+i, 0xc8+i, 0xc0+i, 0x48+i, "v_cmp_"+COMPI[i]+"_u32")
968 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
969 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xd0+i, 0xd0+i, 0xd8+i, 0xd8+i, 0xd0+i, 0xc8+i, "v_cmpx_"+COMPI[i]+"_u32")
970 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
971 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xe0+i, 0xe0+i, 0xe8+i, 0xe8+i, 0xe0+i, 0x58+i, "v_cmp_"+COMPI[i]+"_u64")
972 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu64)
973 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xf0+i, 0xf0+i, 0xf8+i, 0xf8+i, 0xf0+i, 0xd8+i, "v_cmpx_"+COMPI[i]+"_u64")
974 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu64)
977 # VOPP instructions: packed 16bit instructions - 1 or 2 inputs and 1 output
979 # opcode, name, input/output modifiers
980 (0x00, "v_pk_mad_i16", False),
981 (0x01, "v_pk_mul_lo_u16", False),
982 (0x02, "v_pk_add_i16", False),
983 (0x03, "v_pk_sub_i16", False),
984 (0x04, "v_pk_lshlrev_b16", False),
985 (0x05, "v_pk_lshrrev_b16", False),
986 (0x06, "v_pk_ashrrev_i16", False),
987 (0x07, "v_pk_max_i16", False),
988 (0x08, "v_pk_min_i16", False),
989 (0x09, "v_pk_mad_u16", False),
990 (0x0a, "v_pk_add_u16", False),
991 (0x0b, "v_pk_sub_u16", False),
992 (0x0c, "v_pk_max_u16", False),
993 (0x0d, "v_pk_min_u16", False),
994 (0x0e, "v_pk_fma_f16", True),
995 (0x0f, "v_pk_add_f16", True),
996 (0x10, "v_pk_mul_f16", True),
997 (0x11, "v_pk_min_f16", True),
998 (0x12, "v_pk_max_f16", True),
999 (0x20, "v_fma_mix_f32", True), # v_mad_mix_f32 in VEGA ISA, v_fma_mix_f32 in RDNA ISA
1000 (0x21, "v_fma_mixlo_f16", True), # v_mad_mixlo_f16 in VEGA ISA, v_fma_mixlo_f16 in RDNA ISA
1001 (0x22, "v_fma_mixhi_f16", True), # v_mad_mixhi_f16 in VEGA ISA, v_fma_mixhi_f16 in RDNA ISA
1003 # note that these are only supported on gfx9+ so we'll need to distinguish between gfx8 and gfx9 here
1004 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, -1, code, code, code, name)
1005 for (code, name, modifiers) in VOPP:
1006 opcode(name, -1, code, code, code, Format.VOP3P, InstrClass.Valu32, modifiers, modifiers)
1007 opcode("v_dot2_i32_i16", -1, 0x26, 0x14, -1, Format.VOP3P, InstrClass.Valu32)
1008 opcode("v_dot2_u32_u16", -1, 0x27, 0x15, -1, Format.VOP3P, InstrClass.Valu32)
1009 opcode("v_dot4_i32_iu8", -1, -1, -1, 0x16, Format.VOP3P, InstrClass.Valu32)
1010 opcode("v_dot4_i32_i8", -1, 0x28, 0x16, -1, Format.VOP3P, InstrClass.Valu32)
1011 opcode("v_dot4_u32_u8", -1, 0x29, 0x17, 0x17, Format.VOP3P, InstrClass.Valu32)
1012 opcode("v_dot8_i32_iu4", -1, -1, -1, 0x18, Format.VOP3P, InstrClass.Valu32)
1013 opcode("v_dot8_u32_u4", -1, 0x2b, 0x19, 0x19, Format.VOP3P, InstrClass.Valu32)
1014 opcode("v_dot2_f32_f16", -1, 0x23, 0x13, 0x13, Format.VOP3P, InstrClass.Valu32)
1015 opcode("v_dot2_f32_bf16", -1, -1, -1, 0x1a, Format.VOP3P, InstrClass.Valu32)
1018 # VINTRP (GFX6 - GFX10.3) instructions:
1020 (0x00, "v_interp_p1_f32"),
1021 (0x01, "v_interp_p2_f32"),
1022 (0x02, "v_interp_mov_f32"),
1024 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (code, code, code, code, code, code, name)
1025 for (code, name) in VINTRP:
1026 opcode(name, code, code, code, -1, Format.VINTRP, InstrClass.Valu32)
1029 # VINTERP (GFX11+) instructions:
1031 (0x00, "v_interp_p10_f32_inreg"),
1032 (0x01, "v_interp_p2_f32_inreg"),
1033 (0x02, "v_interp_p10_f16_f32_inreg"),
1034 (0x03, "v_interp_p2_f16_f32_inreg"),
1035 (0x04, "v_interp_p10_rtz_f16_f32_inreg"),
1036 (0x05, "v_interp_p2_rtz_f16_f32_inreg"),
1038 for (code, name) in VINTERP:
1039 opcode(name, -1, -1, -1, code, Format.VINTERP_INREG, InstrClass.Valu32)
1042 # VOP3 instructions: 3 inputs, 1 output
1043 # VOP3b instructions: have a unique scalar output, e.g. VOP2 with vcc out
1045 (0x140, 0x140, 0x1c0, 0x1c0, 0x140, -1, "v_mad_legacy_f32", True, True), # GFX6-GFX10
1046 (0x141, 0x141, 0x1c1, 0x1c1, 0x141, -1, "v_mad_f32", True, True),
1047 (0x142, 0x142, 0x1c2, 0x1c2, 0x142, 0x20a, "v_mad_i32_i24", False, False),
1048 (0x143, 0x143, 0x1c3, 0x1c3, 0x143, 0x20b, "v_mad_u32_u24", False, False),
1049 (0x144, 0x144, 0x1c4, 0x1c4, 0x144, 0x20c, "v_cubeid_f32", True, True),
1050 (0x145, 0x145, 0x1c5, 0x1c5, 0x145, 0x20d, "v_cubesc_f32", True, True),
1051 (0x146, 0x146, 0x1c6, 0x1c6, 0x146, 0x20e, "v_cubetc_f32", True, True),
1052 (0x147, 0x147, 0x1c7, 0x1c7, 0x147, 0x20f, "v_cubema_f32", True, True),
1053 (0x148, 0x148, 0x1c8, 0x1c8, 0x148, 0x210, "v_bfe_u32", False, False),
1054 (0x149, 0x149, 0x1c9, 0x1c9, 0x149, 0x211, "v_bfe_i32", False, False),
1055 (0x14a, 0x14a, 0x1ca, 0x1ca, 0x14a, 0x212, "v_bfi_b32", False, False),
1056 (0x14b, 0x14b, 0x1cb, 0x1cb, 0x14b, 0x213, "v_fma_f32", True, True, InstrClass.ValuFma),
1057 (0x14c, 0x14c, 0x1cc, 0x1cc, 0x14c, 0x214, "v_fma_f64", True, True, InstrClass.ValuDouble),
1058 (0x14d, 0x14d, 0x1cd, 0x1cd, 0x14d, 0x215, "v_lerp_u8", False, False),
1059 (0x14e, 0x14e, 0x1ce, 0x1ce, 0x14e, 0x216, "v_alignbit_b32", False, False),
1060 (0x14f, 0x14f, 0x1cf, 0x1cf, 0x14f, 0x217, "v_alignbyte_b32", False, False),
1061 (0x150, 0x150, -1, -1, 0x150, 0x218, "v_mullit_f32", True, True),
1062 (0x151, 0x151, 0x1d0, 0x1d0, 0x151, 0x219, "v_min3_f32", True, True),
1063 (0x152, 0x152, 0x1d1, 0x1d1, 0x152, 0x21a, "v_min3_i32", False, False),
1064 (0x153, 0x153, 0x1d2, 0x1d2, 0x153, 0x21b, "v_min3_u32", False, False),
1065 (0x154, 0x154, 0x1d3, 0x1d3, 0x154, 0x21c, "v_max3_f32", True, True),
1066 (0x155, 0x155, 0x1d4, 0x1d4, 0x155, 0x21d, "v_max3_i32", False, False),
1067 (0x156, 0x156, 0x1d5, 0x1d5, 0x156, 0x21e, "v_max3_u32", False, False),
1068 (0x157, 0x157, 0x1d6, 0x1d6, 0x157, 0x21f, "v_med3_f32", True, True),
1069 (0x158, 0x158, 0x1d7, 0x1d7, 0x158, 0x220, "v_med3_i32", False, False),
1070 (0x159, 0x159, 0x1d8, 0x1d8, 0x159, 0x221, "v_med3_u32", False, False),
1071 (0x15a, 0x15a, 0x1d9, 0x1d9, 0x15a, 0x222, "v_sad_u8", False, False),
1072 (0x15b, 0x15b, 0x1da, 0x1da, 0x15b, 0x223, "v_sad_hi_u8", False, False),
1073 (0x15c, 0x15c, 0x1db, 0x1db, 0x15c, 0x224, "v_sad_u16", False, False),
1074 (0x15d, 0x15d, 0x1dc, 0x1dc, 0x15d, 0x225, "v_sad_u32", False, False),
1075 (0x15e, 0x15e, 0x1dd, 0x1dd, 0x15e, 0x226, "v_cvt_pk_u8_f32", True, False),
1076 (0x15f, 0x15f, 0x1de, 0x1de, 0x15f, 0x227, "v_div_fixup_f32", True, True),
1077 (0x160, 0x160, 0x1df, 0x1df, 0x160, 0x228, "v_div_fixup_f64", True, True),
1078 (0x161, 0x161, -1, -1, -1, -1, "v_lshl_b64", False, False, InstrClass.Valu64),
1079 (0x162, 0x162, -1, -1, -1, -1, "v_lshr_b64", False, False, InstrClass.Valu64),
1080 (0x163, 0x163, -1, -1, -1, -1, "v_ashr_i64", False, False, InstrClass.Valu64),
1081 (0x164, 0x164, 0x280, 0x280, 0x164, 0x327, "v_add_f64", True, True, InstrClass.ValuDoubleAdd),
1082 (0x165, 0x165, 0x281, 0x281, 0x165, 0x328, "v_mul_f64", True, True, InstrClass.ValuDouble),
1083 (0x166, 0x166, 0x282, 0x282, 0x166, 0x329, "v_min_f64", True, True, InstrClass.ValuDouble),
1084 (0x167, 0x167, 0x283, 0x283, 0x167, 0x32a, "v_max_f64", True, True, InstrClass.ValuDouble),
1085 (0x168, 0x168, 0x284, 0x284, 0x168, 0x32b, "v_ldexp_f64", False, True, InstrClass.ValuDouble), # src1 can take input modifiers
1086 (0x169, 0x169, 0x285, 0x285, 0x169, 0x32c, "v_mul_lo_u32", False, False, InstrClass.ValuQuarterRate32),
1087 (0x16a, 0x16a, 0x286, 0x286, 0x16a, 0x32d, "v_mul_hi_u32", False, False, InstrClass.ValuQuarterRate32),
1088 (0x16b, 0x16b, 0x285, 0x285, 0x16b, 0x32c, "v_mul_lo_i32", False, False, InstrClass.ValuQuarterRate32), # identical to v_mul_lo_u32
1089 (0x16c, 0x16c, 0x287, 0x287, 0x16c, 0x32e, "v_mul_hi_i32", False, False, InstrClass.ValuQuarterRate32),
1090 (0x16d, 0x16d, 0x1e0, 0x1e0, 0x16d, 0x2fc, "v_div_scale_f32", True, True), # writes to VCC
1091 (0x16e, 0x16e, 0x1e1, 0x1e1, 0x16e, 0x2fd, "v_div_scale_f64", True, True, InstrClass.ValuDouble), # writes to VCC
1092 (0x16f, 0x16f, 0x1e2, 0x1e2, 0x16f, 0x237, "v_div_fmas_f32", True, True), # takes VCC input
1093 (0x170, 0x170, 0x1e3, 0x1e3, 0x170, 0x238, "v_div_fmas_f64", True, True, InstrClass.ValuDouble), # takes VCC input
1094 (0x171, 0x171, 0x1e4, 0x1e4, 0x171, 0x239, "v_msad_u8", False, False),
1095 (0x172, 0x172, 0x1e5, 0x1e5, 0x172, 0x23a, "v_qsad_pk_u16_u8", False, False),
1096 (0x172, -1, -1, -1, -1, -1, "v_qsad_u8", False, False), # what's the difference?
1097 (0x173, 0x173, 0x1e6, 0x1e6, 0x173, 0x23b, "v_mqsad_pk_u16_u8", False, False),
1098 (0x173, -1, -1, -1, -1, -1, "v_mqsad_u8", False, False), # what's the difference?
1099 (0x174, 0x174, 0x292, 0x292, 0x174, 0x32f, "v_trig_preop_f64", False, False, InstrClass.ValuDouble),
1100 ( -1, 0x175, 0x1e7, 0x1e7, 0x175, 0x23d, "v_mqsad_u32_u8", False, False),
1101 ( -1, 0x176, 0x1e8, 0x1e8, 0x176, 0x2fe, "v_mad_u64_u32", False, False, InstrClass.Valu64),
1102 ( -1, 0x177, 0x1e9, 0x1e9, 0x177, 0x2ff, "v_mad_i64_i32", False, False, InstrClass.Valu64),
1103 ( -1, -1, 0x1ea, 0x1ea, -1, -1, "v_mad_legacy_f16", True, True),
1104 ( -1, -1, 0x1eb, 0x1eb, -1, -1, "v_mad_legacy_u16", False, False),
1105 ( -1, -1, 0x1ec, 0x1ec, -1, -1, "v_mad_legacy_i16", False, False),
1106 ( -1, -1, 0x1ed, 0x1ed, 0x344, 0x244, "v_perm_b32", False, False),
1107 ( -1, -1, 0x1ee, 0x1ee, -1, -1, "v_fma_legacy_f16", True, True, InstrClass.ValuFma),
1108 ( -1, -1, 0x1ef, 0x1ef, -1, -1, "v_div_fixup_legacy_f16", True, True),
1109 (0x12c, 0x12c, 0x1f0, 0x1f0, -1, -1, "v_cvt_pkaccum_u8_f32", True, False),
1110 ( -1, -1, -1, 0x1f1, 0x373, 0x259, "v_mad_u32_u16", False, False),
1111 ( -1, -1, -1, 0x1f2, 0x375, 0x25a, "v_mad_i32_i16", False, False),
1112 ( -1, -1, -1, 0x1f3, 0x345, 0x245, "v_xad_u32", False, False),
1113 ( -1, -1, -1, 0x1f4, 0x351, 0x249, "v_min3_f16", True, True),
1114 ( -1, -1, -1, 0x1f5, 0x352, 0x24a, "v_min3_i16", False, False),
1115 ( -1, -1, -1, 0x1f6, 0x353, 0x24b, "v_min3_u16", False, False),
1116 ( -1, -1, -1, 0x1f7, 0x354, 0x24c, "v_max3_f16", True, True),
1117 ( -1, -1, -1, 0x1f8, 0x355, 0x24d, "v_max3_i16", False, False),
1118 ( -1, -1, -1, 0x1f9, 0x356, 0x24e, "v_max3_u16", False, False),
1119 ( -1, -1, -1, 0x1fa, 0x357, 0x24f, "v_med3_f16", True, True),
1120 ( -1, -1, -1, 0x1fb, 0x358, 0x250, "v_med3_i16", False, False),
1121 ( -1, -1, -1, 0x1fc, 0x359, 0x251, "v_med3_u16", False, False),
1122 ( -1, -1, -1, 0x1fd, 0x346, 0x246, "v_lshl_add_u32", False, False),
1123 ( -1, -1, -1, 0x1fe, 0x347, 0x247, "v_add_lshl_u32", False, False),
1124 ( -1, -1, -1, 0x1ff, 0x36d, 0x255, "v_add3_u32", False, False),
1125 ( -1, -1, -1, 0x200, 0x36f, 0x256, "v_lshl_or_b32", False, False),
1126 ( -1, -1, -1, 0x201, 0x371, 0x257, "v_and_or_b32", False, False),
1127 ( -1, -1, -1, 0x202, 0x372, 0x258, "v_or3_b32", False, False),
1128 ( -1, -1, -1, 0x203, -1, -1, "v_mad_f16", True, True),
1129 ( -1, -1, -1, 0x204, 0x340, 0x241, "v_mad_u16", False, False),
1130 ( -1, -1, -1, 0x205, 0x35e, 0x253, "v_mad_i16", False, False),
1131 ( -1, -1, -1, 0x206, 0x34b, 0x248, "v_fma_f16", True, True),
1132 ( -1, -1, -1, 0x207, 0x35f, 0x254, "v_div_fixup_f16", True, True),
1133 ( -1, -1, 0x274, 0x274, 0x342, -1, "v_interp_p1ll_f16", True, True),
1134 ( -1, -1, 0x275, 0x275, 0x343, -1, "v_interp_p1lv_f16", True, True),
1135 ( -1, -1, 0x276, 0x276, -1, -1, "v_interp_p2_legacy_f16", True, True),
1136 ( -1, -1, -1, 0x277, 0x35a, -1, "v_interp_p2_f16", True, True),
1137 (0x12b, 0x12b, 0x288, 0x288, 0x362, 0x31c, "v_ldexp_f32", False, True),
1138 ( -1, -1, 0x289, 0x289, 0x360, 0x360, "v_readlane_b32_e64", False, False),
1139 ( -1, -1, 0x28a, 0x28a, 0x361, 0x361, "v_writelane_b32_e64", False, False),
1140 (0x122, 0x122, 0x28b, 0x28b, 0x364, 0x31e, "v_bcnt_u32_b32", False, False),
1141 (0x123, 0x123, 0x28c, 0x28c, 0x365, 0x31f, "v_mbcnt_lo_u32_b32", False, False),
1142 ( -1, -1, 0x28d, 0x28d, 0x366, 0x320, "v_mbcnt_hi_u32_b32_e64", False, False),
1143 ( -1, -1, 0x28f, 0x28f, 0x2ff, 0x33c, "v_lshlrev_b64", False, False, InstrClass.Valu64),
1144 ( -1, -1, 0x290, 0x290, 0x300, 0x33d, "v_lshrrev_b64", False, False, InstrClass.Valu64),
1145 ( -1, -1, 0x291, 0x291, 0x301, 0x33e, "v_ashrrev_i64", False, False, InstrClass.Valu64),
1146 (0x11e, 0x11e, 0x293, 0x293, 0x363, 0x31d, "v_bfm_b32", False, False),
1147 (0x12d, 0x12d, 0x294, 0x294, 0x368, 0x321, "v_cvt_pknorm_i16_f32", True, False),
1148 (0x12e, 0x12e, 0x295, 0x295, 0x369, 0x322, "v_cvt_pknorm_u16_f32", True, False),
1149 ( -1, -1, 0x296, 0x296, -1, -1, "v_cvt_pkrtz_f16_f32_e64", True, False),
1150 (0x130, 0x130, 0x297, 0x297, 0x36a, 0x323, "v_cvt_pk_u16_u32", False, False),
1151 (0x131, 0x131, 0x298, 0x298, 0x36b, 0x324, "v_cvt_pk_i16_i32", False, False),
1152 ( -1, -1, -1, 0x299, 0x312, 0x312, "v_cvt_pknorm_i16_f16", True, False), #v_cvt_pk_norm_i16_f32 in GFX11
1153 ( -1, -1, -1, 0x29a, 0x313, 0x313, "v_cvt_pknorm_u16_f16", True, False), #v_cvt_pk_norm_u16_f32 in GFX11
1154 ( -1, -1, -1, 0x29c, 0x37f, 0x326, "v_add_i32", False, False),
1155 ( -1, -1, -1, 0x29d, 0x376, 0x325, "v_sub_i32", False, False),
1156 ( -1, -1, -1, 0x29e, 0x30d, 0x30d, "v_add_i16", False, False),
1157 ( -1, -1, -1, 0x29f, 0x30e, 0x30e, "v_sub_i16", False, False),
1158 ( -1, -1, -1, 0x2a0, 0x311, 0x311, "v_pack_b32_f16", True, False),
1159 ( -1, -1, -1, -1, 0x178, 0x240, "v_xor3_b32", False, False),
1160 ( -1, -1, -1, -1, 0x377, 0x25b, "v_permlane16_b32", False, False),
1161 ( -1, -1, -1, -1, 0x378, 0x25c, "v_permlanex16_b32", False, False),
1162 ( -1, -1, -1, -1, 0x30f, 0x300, "v_add_co_u32_e64", False, False),
1163 ( -1, -1, -1, -1, 0x310, 0x301, "v_sub_co_u32_e64", False, False),
1164 ( -1, -1, -1, -1, 0x319, 0x302, "v_subrev_co_u32_e64", False, False),
1165 ( -1, -1, -1, -1, 0x303, 0x303, "v_add_u16_e64", False, False),
1166 ( -1, -1, -1, -1, 0x304, 0x304, "v_sub_u16_e64", False, False),
1167 ( -1, -1, -1, -1, 0x305, 0x305, "v_mul_lo_u16_e64", False, False),
1168 ( -1, -1, -1, -1, 0x309, 0x309, "v_max_u16_e64", False, False),
1169 ( -1, -1, -1, -1, 0x30a, 0x30a, "v_max_i16_e64", False, False),
1170 ( -1, -1, -1, -1, 0x30b, 0x30b, "v_min_u16_e64", False, False),
1171 ( -1, -1, -1, -1, 0x30c, 0x30c, "v_min_i16_e64", False, False),
1172 ( -1, -1, -1, -1, 0x307, 0x339, "v_lshrrev_b16_e64", False, False),
1173 ( -1, -1, -1, -1, 0x308, 0x33a, "v_ashrrev_i16_e64", False, False),
1174 ( -1, -1, -1, -1, 0x314, 0x338, "v_lshlrev_b16_e64", False, False),
1175 ( -1, -1, -1, -1, 0x140, 0x209, "v_fma_legacy_f32", True, True, InstrClass.ValuFma), #GFX10.3+, v_fma_dx9_zero_f32 in GFX11
1176 ( -1, -1, -1, -1, -1, 0x25e, "v_maxmin_f32", True, True),
1177 ( -1, -1, -1, -1, -1, 0x25f, "v_minmax_f32", True, True),
1178 ( -1, -1, -1, -1, -1, 0x260, "v_maxmin_f16", True, True),
1179 ( -1, -1, -1, -1, -1, 0x261, "v_minmax_f16", True, True),
1180 ( -1, -1, -1, -1, -1, 0x262, "v_maxmin_u32", False, False),
1181 ( -1, -1, -1, -1, -1, 0x263, "v_minmax_u32", False, False),
1182 ( -1, -1, -1, -1, -1, 0x264, "v_maxmin_i32", False, False),
1183 ( -1, -1, -1, -1, -1, 0x265, "v_minmax_i32", False, False),
1184 ( -1, -1, -1, -1, -1, 0x266, "v_dot2_f16_f16", False, False),
1185 ( -1, -1, -1, -1, -1, 0x267, "v_dot2_bf16_bf16", False, False),
1186 ( -1, -1, -1, -1, -1, 0x306, "v_cvt_pk_i16_f32", True, False),
1187 ( -1, -1, -1, -1, -1, 0x307, "v_cvt_pk_u16_f32", True, False),
1188 ( -1, -1, -1, -1, -1, 0x362, "v_and_b16", False, False),
1189 ( -1, -1, -1, -1, -1, 0x363, "v_or_b16", False, False),
1190 ( -1, -1, -1, -1, -1, 0x364, "v_xor_b16", False, False),
1191 ( -1, -1, -1, -1, -1, 0x25d, "v_cndmask_b16", True, False),
1193 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod, cls) in default_class(VOP3, InstrClass.Valu32):
1194 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP3, cls, in_mod, out_mod)
1197 # DS instructions: 3 inputs (1 addr, 2 data), 1 output
1199 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"),
1200 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "ds_sub_u32"),
1201 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "ds_rsub_u32"),
1202 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "ds_inc_u32"),
1203 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "ds_dec_u32"),
1204 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "ds_min_i32"),
1205 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "ds_max_i32"),
1206 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "ds_min_u32"),
1207 (0x08, 0x08, 0x08, 0x08, 0x08, 0x08, "ds_max_u32"),
1208 (0x09, 0x09, 0x09, 0x09, 0x09, 0x09, "ds_and_b32"),
1209 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "ds_or_b32"),
1210 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "ds_xor_b32"),
1211 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "ds_mskor_b32"),
1212 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "ds_write_b32"), #ds_store_b32 in GFX11
1213 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "ds_write2_b32"), #ds_store_2addr_b32 in GFX11
1214 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "ds_write2st64_b32"), #ds_store_2addr_stride64_b32 in GFX11
1215 (0x10, 0x10, 0x10, 0x10, 0x10, 0x10, "ds_cmpst_b32"), #ds_cmpstore_b32 in GFX11
1216 (0x11, 0x11, 0x11, 0x11, 0x11, 0x11, "ds_cmpst_f32"), #ds_cmpstore_f32 in GFX11
1217 (0x12, 0x12, 0x12, 0x12, 0x12, 0x12, "ds_min_f32"),
1218 (0x13, 0x13, 0x13, 0x13, 0x13, 0x13, "ds_max_f32"),
1219 ( -1, 0x14, 0x14, 0x14, 0x14, 0x14, "ds_nop"),
1220 ( -1, -1, 0x15, 0x15, 0x15, 0x15, "ds_add_f32"),
1221 ( -1, -1, 0x1d, 0x1d, 0xb0, 0xb0, "ds_write_addtid_b32"), #ds_store_addtid_b32 in GFX11
1222 (0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, "ds_write_b8"), #ds_store_b8 in GFX11
1223 (0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, "ds_write_b16"), #ds_store_b16 in GFX11
1224 (0x20, 0x20, 0x20, 0x20, 0x20, 0x20, "ds_add_rtn_u32"),
1225 (0x21, 0x21, 0x21, 0x21, 0x21, 0x21, "ds_sub_rtn_u32"),
1226 (0x22, 0x22, 0x22, 0x22, 0x22, 0x22, "ds_rsub_rtn_u32"),
1227 (0x23, 0x23, 0x23, 0x23, 0x23, 0x23, "ds_inc_rtn_u32"),
1228 (0x24, 0x24, 0x24, 0x24, 0x24, 0x24, "ds_dec_rtn_u32"),
1229 (0x25, 0x25, 0x25, 0x25, 0x25, 0x25, "ds_min_rtn_i32"),
1230 (0x26, 0x26, 0x26, 0x26, 0x26, 0x26, "ds_max_rtn_i32"),
1231 (0x27, 0x27, 0x27, 0x27, 0x27, 0x27, "ds_min_rtn_u32"),
1232 (0x28, 0x28, 0x28, 0x28, 0x28, 0x28, "ds_max_rtn_u32"),
1233 (0x29, 0x29, 0x29, 0x29, 0x29, 0x29, "ds_and_rtn_b32"),
1234 (0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, "ds_or_rtn_b32"),
1235 (0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, "ds_xor_rtn_b32"),
1236 (0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, "ds_mskor_rtn_b32"),
1237 (0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, "ds_wrxchg_rtn_b32"), #ds_storexchg_rtn_b32 in GFX11
1238 (0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, "ds_wrxchg2_rtn_b32"), #ds_storexchg_2addr_rtn_b32 in GFX11
1239 (0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, "ds_wrxchg2st64_rtn_b32"), #ds_storexchg_2addr_stride64_rtn_b32 in GFX11
1240 (0x30, 0x30, 0x30, 0x30, 0x30, 0x30, "ds_cmpst_rtn_b32"), #ds_cmpstore_rtn_b32 in GFX11
1241 (0x31, 0x31, 0x31, 0x31, 0x31, 0x31, "ds_cmpst_rtn_f32"), #ds_cmpstore_rtn_f32 in GFX11
1242 (0x32, 0x32, 0x32, 0x32, 0x32, 0x32, "ds_min_rtn_f32"),
1243 (0x33, 0x33, 0x33, 0x33, 0x33, 0x33, "ds_max_rtn_f32"),
1244 ( -1, 0x34, 0x34, 0x34, 0x34, 0x34, "ds_wrap_rtn_b32"),
1245 ( -1, -1, 0x35, 0x35, 0x55, 0x79, "ds_add_rtn_f32"),
1246 (0x36, 0x36, 0x36, 0x36, 0x36, 0x36, "ds_read_b32"), #ds_load_b32 in GFX11
1247 (0x37, 0x37, 0x37, 0x37, 0x37, 0x37, "ds_read2_b32"), #ds_load_2addr_b32 in GFX11
1248 (0x38, 0x38, 0x38, 0x38, 0x38, 0x38, "ds_read2st64_b32"), #ds_load_2addr_stride64_b32 in GFX11
1249 (0x39, 0x39, 0x39, 0x39, 0x39, 0x39, "ds_read_i8"), #ds_load_i8 in GFX11
1250 (0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, "ds_read_u8"), #ds_load_u8 in GFX11
1251 (0x3b, 0x3b, 0x3b, 0x3b, 0x3b, 0x3b, "ds_read_i16"), #ds_load_i16 in GFX11
1252 (0x3c, 0x3c, 0x3c, 0x3c, 0x3c, 0x3c, "ds_read_u16"), #ds_load_u16 in GFX11
1253 (0x35, 0x35, 0x3d, 0x3d, 0x35, 0x35, "ds_swizzle_b32"), #data1 & offset, no addr/data2
1254 ( -1, -1, 0x3e, 0x3e, 0xb2, 0xb2, "ds_permute_b32"),
1255 ( -1, -1, 0x3f, 0x3f, 0xb3, 0xb3, "ds_bpermute_b32"),
1256 (0x40, 0x40, 0x40, 0x40, 0x40, 0x40, "ds_add_u64"),
1257 (0x41, 0x41, 0x41, 0x41, 0x41, 0x41, "ds_sub_u64"),
1258 (0x42, 0x42, 0x42, 0x42, 0x42, 0x42, "ds_rsub_u64"),
1259 (0x43, 0x43, 0x43, 0x43, 0x43, 0x43, "ds_inc_u64"),
1260 (0x44, 0x44, 0x44, 0x44, 0x44, 0x44, "ds_dec_u64"),
1261 (0x45, 0x45, 0x45, 0x45, 0x45, 0x45, "ds_min_i64"),
1262 (0x46, 0x46, 0x46, 0x46, 0x46, 0x46, "ds_max_i64"),
1263 (0x47, 0x47, 0x47, 0x47, 0x47, 0x47, "ds_min_u64"),
1264 (0x48, 0x48, 0x48, 0x48, 0x48, 0x48, "ds_max_u64"),
1265 (0x49, 0x49, 0x49, 0x49, 0x49, 0x49, "ds_and_b64"),
1266 (0x4a, 0x4a, 0x4a, 0x4a, 0x4a, 0x4a, "ds_or_b64"),
1267 (0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, "ds_xor_b64"),
1268 (0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, "ds_mskor_b64"),
1269 (0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, "ds_write_b64"), #ds_store_b64 in GFX11
1270 (0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, "ds_write2_b64"), #ds_store_2addr_b64 in GFX11
1271 (0x4f, 0x4f, 0x4f, 0x4f, 0x4f, 0x4f, "ds_write2st64_b64"), #ds_store_2addr_stride64_b64 in GFX11
1272 (0x50, 0x50, 0x50, 0x50, 0x50, 0x50, "ds_cmpst_b64"), #ds_cmpstore_b64 in GFX11
1273 (0x51, 0x51, 0x51, 0x51, 0x51, 0x51, "ds_cmpst_f64"), #ds_cmpstore_f64 in GFX11
1274 (0x52, 0x52, 0x52, 0x52, 0x52, 0x52, "ds_min_f64"),
1275 (0x53, 0x53, 0x53, 0x53, 0x53, 0x53, "ds_max_f64"),
1276 ( -1, -1, -1, 0x54, 0xa0, 0xa0, "ds_write_b8_d16_hi"), #ds_store_b8_d16_hi in GFX11
1277 ( -1, -1, -1, 0x55, 0xa1, 0xa1, "ds_write_b16_d16_hi"), #ds_store_b16_d16_hi in GFX11
1278 ( -1, -1, -1, 0x56, 0xa2, 0xa2, "ds_read_u8_d16"), #ds_load_u8_d16 in GFX11
1279 ( -1, -1, -1, 0x57, 0xa3, 0xa3, "ds_read_u8_d16_hi"), #ds_load_u8_d16_hi in GFX11
1280 ( -1, -1, -1, 0x58, 0xa4, 0xa4, "ds_read_i8_d16"), #ds_load_i8_d16 in GFX11
1281 ( -1, -1, -1, 0x59, 0xa5, 0xa5, "ds_read_i8_d16_hi"), #ds_load_i8_d16_hi in GFX11
1282 ( -1, -1, -1, 0x5a, 0xa6, 0xa6, "ds_read_u16_d16"), #ds_load_u16_d16 in GFX11
1283 ( -1, -1, -1, 0x5b, 0xa7, 0xa7, "ds_read_u16_d16_hi"), #ds_load_u16_d16_hi in GFX11
1284 (0x60, 0x60, 0x60, 0x60, 0x60, 0x60, "ds_add_rtn_u64"),
1285 (0x61, 0x61, 0x61, 0x61, 0x61, 0x61, "ds_sub_rtn_u64"),
1286 (0x62, 0x62, 0x62, 0x62, 0x62, 0x62, "ds_rsub_rtn_u64"),
1287 (0x63, 0x63, 0x63, 0x63, 0x63, 0x63, "ds_inc_rtn_u64"),
1288 (0x64, 0x64, 0x64, 0x64, 0x64, 0x64, "ds_dec_rtn_u64"),
1289 (0x65, 0x65, 0x65, 0x65, 0x65, 0x65, "ds_min_rtn_i64"),
1290 (0x66, 0x66, 0x66, 0x66, 0x66, 0x66, "ds_max_rtn_i64"),
1291 (0x67, 0x67, 0x67, 0x67, 0x67, 0x67, "ds_min_rtn_u64"),
1292 (0x68, 0x68, 0x68, 0x68, 0x68, 0x68, "ds_max_rtn_u64"),
1293 (0x69, 0x69, 0x69, 0x69, 0x69, 0x69, "ds_and_rtn_b64"),
1294 (0x6a, 0x6a, 0x6a, 0x6a, 0x6a, 0x6a, "ds_or_rtn_b64"),
1295 (0x6b, 0x6b, 0x6b, 0x6b, 0x6b, 0x6b, "ds_xor_rtn_b64"),
1296 (0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, "ds_mskor_rtn_b64"),
1297 (0x6d, 0x6d, 0x6d, 0x6d, 0x6d, 0x6d, "ds_wrxchg_rtn_b64"), #ds_storexchg_rtn_b64 in GFX11
1298 (0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, "ds_wrxchg2_rtn_b64"), #ds_storexchg_2addr_rtn_b64 in GFX11
1299 (0x6f, 0x6f, 0x6f, 0x6f, 0x6f, 0x6f, "ds_wrxchg2st64_rtn_b64"), #ds_storexchg_2addr_stride64_rtn_b64 in GFX11
1300 (0x70, 0x70, 0x70, 0x70, 0x70, 0x70, "ds_cmpst_rtn_b64"), #ds_cmpstore_rtn_b64 in GFX11
1301 (0x71, 0x71, 0x71, 0x71, 0x71, 0x71, "ds_cmpst_rtn_f64"), #ds_cmpstore_rtn_f64 in GFX11
1302 (0x72, 0x72, 0x72, 0x72, 0x72, 0x72, "ds_min_rtn_f64"),
1303 (0x73, 0x73, 0x73, 0x73, 0x73, 0x73, "ds_max_rtn_f64"),
1304 (0x76, 0x76, 0x76, 0x76, 0x76, 0x76, "ds_read_b64"), #ds_load_b64 in GFX11
1305 (0x77, 0x77, 0x77, 0x77, 0x77, 0x77, "ds_read2_b64"), #ds_load_2addr_b64 in GFX11
1306 (0x78, 0x78, 0x78, 0x78, 0x78, 0x78, "ds_read2st64_b64"), #ds_load_2addr_stride64_b64 in GFX11
1307 ( -1, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, "ds_condxchg32_rtn_b64"),
1308 (0x80, 0x80, 0x80, 0x80, 0x80, -1, "ds_add_src2_u32"),
1309 (0x81, 0x81, 0x81, 0x81, 0x81, -1, "ds_sub_src2_u32"),
1310 (0x82, 0x82, 0x82, 0x82, 0x82, -1, "ds_rsub_src2_u32"),
1311 (0x83, 0x83, 0x83, 0x83, 0x83, -1, "ds_inc_src2_u32"),
1312 (0x84, 0x84, 0x84, 0x84, 0x84, -1, "ds_dec_src2_u32"),
1313 (0x85, 0x85, 0x85, 0x85, 0x85, -1, "ds_min_src2_i32"),
1314 (0x86, 0x86, 0x86, 0x86, 0x86, -1, "ds_max_src2_i32"),
1315 (0x87, 0x87, 0x87, 0x87, 0x87, -1, "ds_min_src2_u32"),
1316 (0x88, 0x88, 0x88, 0x88, 0x88, -1, "ds_max_src2_u32"),
1317 (0x89, 0x89, 0x89, 0x89, 0x89, -1, "ds_and_src2_b32"),
1318 (0x8a, 0x8a, 0x8a, 0x8a, 0x8a, -1, "ds_or_src2_b32"),
1319 (0x8b, 0x8b, 0x8b, 0x8b, 0x8b, -1, "ds_xor_src2_b32"),
1320 (0x8d, 0x8d, 0x8d, 0x8d, 0x8d, -1, "ds_write_src2_b32"),
1321 (0x92, 0x92, 0x92, 0x92, 0x92, -1, "ds_min_src2_f32"),
1322 (0x93, 0x93, 0x93, 0x93, 0x93, -1, "ds_max_src2_f32"),
1323 ( -1, -1, 0x95, 0x95, 0x95, -1, "ds_add_src2_f32"),
1324 ( -1, 0x18, 0x98, 0x98, 0x18, 0x18, "ds_gws_sema_release_all"),
1325 (0x19, 0x19, 0x99, 0x99, 0x19, 0x19, "ds_gws_init"),
1326 (0x1a, 0x1a, 0x9a, 0x9a, 0x1a, 0x1a, "ds_gws_sema_v"),
1327 (0x1b, 0x1b, 0x9b, 0x9b, 0x1b, 0x1b, "ds_gws_sema_br"),
1328 (0x1c, 0x1c, 0x9c, 0x9c, 0x1c, 0x1c, "ds_gws_sema_p"),
1329 (0x1d, 0x1d, 0x9d, 0x9d, 0x1d, 0x1d, "ds_gws_barrier"),
1330 ( -1, -1, 0xb6, 0xb6, 0xb1, 0xb1, "ds_read_addtid_b32"), #ds_load_addtid_b32 in GFX11
1331 (0x3d, 0x3d, 0xbd, 0xbd, 0x3d, 0x3d, "ds_consume"),
1332 (0x3e, 0x3e, 0xbe, 0xbe, 0x3e, 0x3e, "ds_append"),
1333 (0x3f, 0x3f, 0xbf, 0xbf, 0x3f, 0x3f, "ds_ordered_count"),
1334 (0xc0, 0xc0, 0xc0, 0xc0, 0xc0, -1, "ds_add_src2_u64"),
1335 (0xc1, 0xc1, 0xc1, 0xc1, 0xc1, -1, "ds_sub_src2_u64"),
1336 (0xc2, 0xc2, 0xc2, 0xc2, 0xc2, -1, "ds_rsub_src2_u64"),
1337 (0xc3, 0xc3, 0xc3, 0xc3, 0xc3, -1, "ds_inc_src2_u64"),
1338 (0xc4, 0xc4, 0xc4, 0xc4, 0xc4, -1, "ds_dec_src2_u64"),
1339 (0xc5, 0xc5, 0xc5, 0xc5, 0xc5, -1, "ds_min_src2_i64"),
1340 (0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -1, "ds_max_src2_i64"),
1341 (0xc7, 0xc7, 0xc7, 0xc7, 0xc7, -1, "ds_min_src2_u64"),
1342 (0xc8, 0xc8, 0xc8, 0xc8, 0xc8, -1, "ds_max_src2_u64"),
1343 (0xc9, 0xc9, 0xc9, 0xc9, 0xc9, -1, "ds_and_src2_b64"),
1344 (0xca, 0xca, 0xca, 0xca, 0xca, -1, "ds_or_src2_b64"),
1345 (0xcb, 0xcb, 0xcb, 0xcb, 0xcb, -1, "ds_xor_src2_b64"),
1346 (0xcd, 0xcd, 0xcd, 0xcd, 0xcd, -1, "ds_write_src2_b64"),
1347 (0xd2, 0xd2, 0xd2, 0xd2, 0xd2, -1, "ds_min_src2_f64"),
1348 (0xd3, 0xd3, 0xd3, 0xd3, 0xd3, -1, "ds_max_src2_f64"),
1349 ( -1, 0xde, 0xde, 0xde, 0xde, 0xde, "ds_write_b96"), #ds_store_b96 in GFX11
1350 ( -1, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, "ds_write_b128"), #ds_store_b128 in GFX11
1351 ( -1, 0xfd, 0xfd, -1, -1, -1, "ds_condxchg32_rtn_b128"),
1352 ( -1, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, "ds_read_b96"), #ds_load_b96 in GFX11
1353 ( -1, 0xff, 0xff, 0xff, 0xff, 0xff, "ds_read_b128"), #ds_load_b128 in GFX11
1354 ( -1, -1, -1, -1, -1, 0x7a, "ds_add_gs_reg_rtn"),
1355 ( -1, -1, -1, -1, -1, 0x7b, "ds_sub_gs_reg_rtn"),
1357 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in DS:
1358 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.DS, InstrClass.DS)
1361 # LDSDIR instructions:
1363 (0x00, "lds_param_load"),
1364 (0x01, "lds_direct_load"),
1366 for (code, name) in LDSDIR:
1367 opcode(name, -1, -1, -1, code, Format.LDSDIR, InstrClass.DS)
1369 # MUBUF instructions:
1371 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "buffer_load_format_x"),
1372 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "buffer_load_format_xy"),
1373 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "buffer_load_format_xyz"),
1374 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "buffer_load_format_xyzw"),
1375 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "buffer_store_format_x"),
1376 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "buffer_store_format_xy"),
1377 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "buffer_store_format_xyz"),
1378 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "buffer_store_format_xyzw"),
1379 ( -1, -1, 0x08, 0x08, 0x80, 0x08, "buffer_load_format_d16_x"),
1380 ( -1, -1, 0x09, 0x09, 0x81, 0x09, "buffer_load_format_d16_xy"),
1381 ( -1, -1, 0x0a, 0x0a, 0x82, 0x0a, "buffer_load_format_d16_xyz"),
1382 ( -1, -1, 0x0b, 0x0b, 0x83, 0x0b, "buffer_load_format_d16_xyzw"),
1383 ( -1, -1, 0x0c, 0x0c, 0x84, 0x0c, "buffer_store_format_d16_x"),
1384 ( -1, -1, 0x0d, 0x0d, 0x85, 0x0d, "buffer_store_format_d16_xy"),
1385 ( -1, -1, 0x0e, 0x0e, 0x86, 0x0e, "buffer_store_format_d16_xyz"),
1386 ( -1, -1, 0x0f, 0x0f, 0x87, 0x0f, "buffer_store_format_d16_xyzw"),
1387 (0x08, 0x08, 0x10, 0x10, 0x08, 0x10, "buffer_load_ubyte"),
1388 (0x09, 0x09, 0x11, 0x11, 0x09, 0x11, "buffer_load_sbyte"),
1389 (0x0a, 0x0a, 0x12, 0x12, 0x0a, 0x12, "buffer_load_ushort"),
1390 (0x0b, 0x0b, 0x13, 0x13, 0x0b, 0x13, "buffer_load_sshort"),
1391 (0x0c, 0x0c, 0x14, 0x14, 0x0c, 0x14, "buffer_load_dword"),
1392 (0x0d, 0x0d, 0x15, 0x15, 0x0d, 0x15, "buffer_load_dwordx2"),
1393 ( -1, 0x0f, 0x16, 0x16, 0x0f, 0x16, "buffer_load_dwordx3"),
1394 (0x0f, 0x0e, 0x17, 0x17, 0x0e, 0x17, "buffer_load_dwordx4"),
1395 (0x18, 0x18, 0x18, 0x18, 0x18, 0x18, "buffer_store_byte"),
1396 ( -1, -1, -1, 0x19, 0x19, 0x24, "buffer_store_byte_d16_hi"),
1397 (0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x19, "buffer_store_short"),
1398 ( -1, -1, -1, 0x1b, 0x1b, 0x25, "buffer_store_short_d16_hi"),
1399 (0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1a, "buffer_store_dword"),
1400 (0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1b, "buffer_store_dwordx2"),
1401 ( -1, 0x1f, 0x1e, 0x1e, 0x1f, 0x1c, "buffer_store_dwordx3"),
1402 (0x1e, 0x1e, 0x1f, 0x1f, 0x1e, 0x1d, "buffer_store_dwordx4"),
1403 ( -1, -1, -1, 0x20, 0x20, 0x1e, "buffer_load_ubyte_d16"),
1404 ( -1, -1, -1, 0x21, 0x21, 0x21, "buffer_load_ubyte_d16_hi"),
1405 ( -1, -1, -1, 0x22, 0x22, 0x1f, "buffer_load_sbyte_d16"),
1406 ( -1, -1, -1, 0x23, 0x23, 0x22, "buffer_load_sbyte_d16_hi"),
1407 ( -1, -1, -1, 0x24, 0x24, 0x20, "buffer_load_short_d16"),
1408 ( -1, -1, -1, 0x25, 0x25, 0x23, "buffer_load_short_d16_hi"),
1409 ( -1, -1, -1, 0x26, 0x26, 0x26, "buffer_load_format_d16_hi_x"),
1410 ( -1, -1, -1, 0x27, 0x27, 0x27, "buffer_store_format_d16_hi_x"),
1411 ( -1, -1, 0x3d, 0x3d, -1, -1, "buffer_store_lds_dword"),
1412 (0x71, 0x71, 0x3e, 0x3e, -1, -1, "buffer_wbinvl1"),
1413 (0x70, 0x70, 0x3f, 0x3f, -1, -1, "buffer_wbinvl1_vol"),
1414 (0x30, 0x30, 0x40, 0x40, 0x30, 0x33, "buffer_atomic_swap"),
1415 (0x31, 0x31, 0x41, 0x41, 0x31, 0x34, "buffer_atomic_cmpswap"),
1416 (0x32, 0x32, 0x42, 0x42, 0x32, 0x35, "buffer_atomic_add"),
1417 (0x33, 0x33, 0x43, 0x43, 0x33, 0x36, "buffer_atomic_sub"),
1418 (0x34, -1, -1, -1, -1, -1, "buffer_atomic_rsub"),
1419 (0x35, 0x35, 0x44, 0x44, 0x35, 0x38, "buffer_atomic_smin"),
1420 (0x36, 0x36, 0x45, 0x45, 0x36, 0x39, "buffer_atomic_umin"),
1421 (0x37, 0x37, 0x46, 0x46, 0x37, 0x3a, "buffer_atomic_smax"),
1422 (0x38, 0x38, 0x47, 0x47, 0x38, 0x3b, "buffer_atomic_umax"),
1423 (0x39, 0x39, 0x48, 0x48, 0x39, 0x3c, "buffer_atomic_and"),
1424 (0x3a, 0x3a, 0x49, 0x49, 0x3a, 0x3d, "buffer_atomic_or"),
1425 (0x3b, 0x3b, 0x4a, 0x4a, 0x3b, 0x3e, "buffer_atomic_xor"),
1426 (0x3c, 0x3c, 0x4b, 0x4b, 0x3c, 0x3f, "buffer_atomic_inc"),
1427 (0x3d, 0x3d, 0x4c, 0x4c, 0x3d, 0x40, "buffer_atomic_dec"),
1428 (0x3e, 0x3e, -1, -1, 0x3e, 0x50, "buffer_atomic_fcmpswap"),
1429 (0x3f, 0x3f, -1, -1, 0x3f, 0x51, "buffer_atomic_fmin"),
1430 (0x40, 0x40, -1, -1, 0x40, 0x52, "buffer_atomic_fmax"),
1431 (0x50, 0x50, 0x60, 0x60, 0x50, 0x41, "buffer_atomic_swap_x2"),
1432 (0x51, 0x51, 0x61, 0x61, 0x51, 0x42, "buffer_atomic_cmpswap_x2"),
1433 (0x52, 0x52, 0x62, 0x62, 0x52, 0x43, "buffer_atomic_add_x2"),
1434 (0x53, 0x53, 0x63, 0x63, 0x53, 0x44, "buffer_atomic_sub_x2"),
1435 (0x54, -1, -1, -1, -1, -1, "buffer_atomic_rsub_x2"),
1436 (0x55, 0x55, 0x64, 0x64, 0x55, 0x45, "buffer_atomic_smin_x2"),
1437 (0x56, 0x56, 0x65, 0x65, 0x56, 0x46, "buffer_atomic_umin_x2"),
1438 (0x57, 0x57, 0x66, 0x66, 0x57, 0x47, "buffer_atomic_smax_x2"),
1439 (0x58, 0x58, 0x67, 0x67, 0x58, 0x48, "buffer_atomic_umax_x2"),
1440 (0x59, 0x59, 0x68, 0x68, 0x59, 0x49, "buffer_atomic_and_x2"),
1441 (0x5a, 0x5a, 0x69, 0x69, 0x5a, 0x4a, "buffer_atomic_or_x2"),
1442 (0x5b, 0x5b, 0x6a, 0x6a, 0x5b, 0x4b, "buffer_atomic_xor_x2"),
1443 (0x5c, 0x5c, 0x6b, 0x6b, 0x5c, 0x4c, "buffer_atomic_inc_x2"),
1444 (0x5d, 0x5d, 0x6c, 0x6c, 0x5d, 0x4d, "buffer_atomic_dec_x2"),
1445 (0x5e, 0x5e, -1, -1, 0x5e, -1, "buffer_atomic_fcmpswap_x2"),
1446 (0x5f, 0x5f, -1, -1, 0x5f, -1, "buffer_atomic_fmin_x2"),
1447 (0x60, 0x60, -1, -1, 0x60, -1, "buffer_atomic_fmax_x2"),
1448 ( -1, -1, -1, -1, 0x71, 0x2b, "buffer_gl0_inv"),
1449 ( -1, -1, -1, -1, 0x72, 0x2c, "buffer_gl1_inv"),
1450 ( -1, -1, -1, -1, 0x34, 0x37, "buffer_atomic_csub"), #GFX10.3+. seems glc must be set. buffer_atomic_csub_u32 in GFX11
1451 ( -1, -1, -1, -1, -1, 0x31, "buffer_load_lds_b32"),
1452 ( -1, -1, -1, -1, -1, 0x32, "buffer_load_lds_format_x"),
1453 ( -1, -1, -1, -1, -1, 0x2e, "buffer_load_lds_i8"),
1454 ( -1, -1, -1, -1, -1, 0x30, "buffer_load_lds_i16"),
1455 ( -1, -1, -1, -1, -1, 0x2d, "buffer_load_lds_u8"),
1456 ( -1, -1, -1, -1, -1, 0x2f, "buffer_load_lds_u16"),
1457 ( -1, -1, -1, -1, -1, 0x56, "buffer_atomic_add_f32"),
1459 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in MUBUF:
1460 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.MUBUF, InstrClass.VMem, is_atomic = "atomic" in name)
1463 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"),
1464 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "tbuffer_load_format_xy"),
1465 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "tbuffer_load_format_xyz"),
1466 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "tbuffer_load_format_xyzw"),
1467 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "tbuffer_store_format_x"),
1468 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "tbuffer_store_format_xy"),
1469 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "tbuffer_store_format_xyz"),
1470 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "tbuffer_store_format_xyzw"),
1471 ( -1, -1, 0x08, 0x08, 0x08, 0x08, "tbuffer_load_format_d16_x"),
1472 ( -1, -1, 0x09, 0x09, 0x09, 0x09, "tbuffer_load_format_d16_xy"),
1473 ( -1, -1, 0x0a, 0x0a, 0x0a, 0x0a, "tbuffer_load_format_d16_xyz"),
1474 ( -1, -1, 0x0b, 0x0b, 0x0b, 0x0b, "tbuffer_load_format_d16_xyzw"),
1475 ( -1, -1, 0x0c, 0x0c, 0x0c, 0x0c, "tbuffer_store_format_d16_x"),
1476 ( -1, -1, 0x0d, 0x0d, 0x0d, 0x0d, "tbuffer_store_format_d16_xy"),
1477 ( -1, -1, 0x0e, 0x0e, 0x0e, 0x0e, "tbuffer_store_format_d16_xyz"),
1478 ( -1, -1, 0x0f, 0x0f, 0x0f, 0x0f, "tbuffer_store_format_d16_xyzw"),
1480 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in MTBUF:
1481 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.MTBUF, InstrClass.VMem)
1485 (0x00, 0x00, "image_load"),
1486 (0x01, 0x01, "image_load_mip"),
1487 (0x02, 0x02, "image_load_pck"),
1488 (0x03, 0x03, "image_load_pck_sgn"),
1489 (0x04, 0x04, "image_load_mip_pck"),
1490 (0x05, 0x05, "image_load_mip_pck_sgn"),
1491 (0x08, 0x06, "image_store"),
1492 (0x09, 0x07, "image_store_mip"),
1493 (0x0a, 0x08, "image_store_pck"),
1494 (0x0b, 0x09, "image_store_mip_pck"),
1495 (0x0e, 0x17, "image_get_resinfo"),
1496 (0x60, 0x38, "image_get_lod"),
1498 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (code, code, code, code, code, name)
1499 for (code, gfx11, name) in IMAGE:
1500 opcode(name, code, code, code, gfx11, Format.MIMG, InstrClass.VMem)
1502 opcode("image_msaa_load", -1, -1, 0x80, 0x18, Format.MIMG, InstrClass.VMem) #GFX10.3+
1505 (0x0f, 0x0f, 0x10, 0x0a, "image_atomic_swap"),
1506 (0x10, 0x10, 0x11, 0x0b, "image_atomic_cmpswap"),
1507 (0x11, 0x11, 0x12, 0x0c, "image_atomic_add"),
1508 (0x12, 0x12, 0x13, 0x0d, "image_atomic_sub"),
1509 (0x13, -1, -1, -1, "image_atomic_rsub"),
1510 (0x14, 0x14, 0x14, 0x0e, "image_atomic_smin"),
1511 (0x15, 0x15, 0x15, 0x0f, "image_atomic_umin"),
1512 (0x16, 0x16, 0x16, 0x10, "image_atomic_smax"),
1513 (0x17, 0x17, 0x17, 0x11, "image_atomic_umax"),
1514 (0x18, 0x18, 0x18, 0x12, "image_atomic_and"),
1515 (0x19, 0x19, 0x19, 0x13, "image_atomic_or"),
1516 (0x1a, 0x1a, 0x1a, 0x14, "image_atomic_xor"),
1517 (0x1b, 0x1b, 0x1b, 0x15, "image_atomic_inc"),
1518 (0x1c, 0x1c, 0x1c, 0x16, "image_atomic_dec"),
1519 (0x1d, 0x1d, -1, -1, "image_atomic_fcmpswap"),
1520 (0x1e, 0x1e, -1, -1, "image_atomic_fmin"),
1521 (0x1f, 0x1f, -1, -1, "image_atomic_fmax"),
1523 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (gfx6, gfx7, gfx89, gfx89, ???, gfx11, name)
1524 # gfx7 and gfx10 opcodes are the same here
1525 for (gfx6, gfx7, gfx89, gfx11, name) in IMAGE_ATOMIC:
1526 opcode(name, gfx7, gfx89, gfx7, gfx11, Format.MIMG, InstrClass.VMem, is_atomic = True)
1529 (0x20, 0x1b, "image_sample"),
1530 (0x21, 0x40, "image_sample_cl"),
1531 (0x22, 0x1c, "image_sample_d"),
1532 (0x23, 0x41, "image_sample_d_cl"),
1533 (0x24, 0x1d, "image_sample_l"),
1534 (0x25, 0x1e, "image_sample_b"),
1535 (0x26, 0x42, "image_sample_b_cl"),
1536 (0x27, 0x1f, "image_sample_lz"),
1537 (0x28, 0x20, "image_sample_c"),
1538 (0x29, 0x43, "image_sample_c_cl"),
1539 (0x2a, 0x21, "image_sample_c_d"),
1540 (0x2b, 0x44, "image_sample_c_d_cl"),
1541 (0x2c, 0x22, "image_sample_c_l"),
1542 (0x2d, 0x23, "image_sample_c_b"),
1543 (0x2e, 0x45, "image_sample_c_b_cl"),
1544 (0x2f, 0x24, "image_sample_c_lz"),
1545 (0x30, 0x25, "image_sample_o"),
1546 (0x31, 0x46, "image_sample_cl_o"),
1547 (0x32, 0x26, "image_sample_d_o"),
1548 (0x33, 0x47, "image_sample_d_cl_o"),
1549 (0x34, 0x27, "image_sample_l_o"),
1550 (0x35, 0x28, "image_sample_b_o"),
1551 (0x36, 0x48, "image_sample_b_cl_o"),
1552 (0x37, 0x29, "image_sample_lz_o"),
1553 (0x38, 0x2a, "image_sample_c_o"),
1554 (0x39, 0x49, "image_sample_c_cl_o"),
1555 (0x3a, 0x2b, "image_sample_c_d_o"),
1556 (0x3b, 0x4a, "image_sample_c_d_cl_o"),
1557 (0x3c, 0x2c, "image_sample_c_l_o"),
1558 (0x3d, 0x2d, "image_sample_c_b_o"),
1559 (0x3e, 0x4b, "image_sample_c_b_cl_o"),
1560 (0x3f, 0x2e, "image_sample_c_lz_o"),
1561 (0x68, -1, "image_sample_cd"),
1562 (0x69, -1, "image_sample_cd_cl"),
1563 (0x6a, -1, "image_sample_c_cd"),
1564 (0x6b, -1, "image_sample_c_cd_cl"),
1565 (0x6c, -1, "image_sample_cd_o"),
1566 (0x6d, -1, "image_sample_cd_cl_o"),
1567 (0x6e, -1, "image_sample_c_cd_o"),
1568 (0x6f, -1, "image_sample_c_cd_cl_o"),
1570 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (code, code, code, code, code, gfx11, name)
1571 for (code, gfx11, name) in IMAGE_SAMPLE:
1572 opcode(name, code, code, code, gfx11, Format.MIMG, InstrClass.VMem)
1574 IMAGE_SAMPLE_G16 = {
1575 (0xa2, 0x39, "image_sample_d_g16"),
1576 (0xa3, 0x5f, "image_sample_d_cl_g16"),
1577 (0xaa, 0x3a, "image_sample_c_d_g16"),
1578 (0xab, 0x54, "image_sample_c_d_cl_g16"),
1579 (0xb2, 0x3b, "image_sample_d_o_g16"),
1580 (0xb3, 0x55, "image_sample_d_cl_o_g16"),
1581 (0xba, 0x3c, "image_sample_c_d_o_g16"),
1582 (0xbb, 0x56, "image_sample_c_d_cl_o_g16"),
1585 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, -1, -1, code, gfx11, name)
1586 for (code, gfx11, name) in IMAGE_SAMPLE_G16:
1587 opcode(name, -1, -1, code, gfx11, Format.MIMG, InstrClass.VMem)
1590 (0x40, 0x2f, "image_gather4"),
1591 (0x41, 0x60, "image_gather4_cl"),
1592 #(0x42, "image_gather4h"), VEGA only?
1593 (0x44, 0x30, "image_gather4_l"), # following instructions have different opcodes according to ISA sheet.
1594 (0x45, 0x31, "image_gather4_b"),
1595 (0x46, 0x61, "image_gather4_b_cl"),
1596 (0x47, 0x32, "image_gather4_lz"),
1597 (0x48, 0x33, "image_gather4_c"),
1598 (0x49, 0x62, "image_gather4_c_cl"), # previous instructions have different opcodes according to ISA sheet.
1599 #(0x4a, "image_gather4h_pck"), VEGA only?
1600 #(0x4b, "image_gather8h_pck"), VGEA only?
1601 (0x4c, 0x63, "image_gather4_c_l"),
1602 (0x4d, 0x64, "image_gather4_c_b"),
1603 (0x4e, 0x65, "image_gather4_c_b_cl"),
1604 (0x4f, 0x34, "image_gather4_c_lz"),
1605 (0x50, 0x35, "image_gather4_o"),
1606 (0x51, -1, "image_gather4_cl_o"),
1607 (0x54, -1, "image_gather4_l_o"),
1608 (0x55, -1, "image_gather4_b_o"),
1609 (0x56, -1, "image_gather4_b_cl_o"),
1610 (0x57, 0x36, "image_gather4_lz_o"),
1611 (0x58, -1, "image_gather4_c_o"),
1612 (0x59, -1, "image_gather4_c_cl_o"),
1613 (0x5c, -1, "image_gather4_c_l_o"),
1614 (0x5d, -1, "image_gather4_c_b_o"),
1615 (0x5e, -1, "image_gather4_c_b_cl_o"),
1616 (0x5f, 0x37, "image_gather4_c_lz_o"),
1618 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (code, code, code, code, code, gfx11, name)
1619 for (code, gfx11, name) in IMAGE_GATHER4:
1620 opcode(name, code, code, code, gfx11, Format.MIMG, InstrClass.VMem)
1622 opcode("image_bvh_intersect_ray", -1, -1, 0xe6, 0x19, Format.MIMG, InstrClass.VMem)
1623 opcode("image_bvh64_intersect_ray", -1, -1, 0xe7, 0x1a, Format.MIMG, InstrClass.VMem)
1626 #GFX7, GFX89,GFX10,GFX11
1627 (0x08, 0x10, 0x08, 0x10, "flat_load_ubyte"),
1628 (0x09, 0x11, 0x09, 0x11, "flat_load_sbyte"),
1629 (0x0a, 0x12, 0x0a, 0x12, "flat_load_ushort"),
1630 (0x0b, 0x13, 0x0b, 0x13, "flat_load_sshort"),
1631 (0x0c, 0x14, 0x0c, 0x14, "flat_load_dword"),
1632 (0x0d, 0x15, 0x0d, 0x15, "flat_load_dwordx2"),
1633 (0x0f, 0x16, 0x0f, 0x16, "flat_load_dwordx3"),
1634 (0x0e, 0x17, 0x0e, 0x17, "flat_load_dwordx4"),
1635 (0x18, 0x18, 0x18, 0x18, "flat_store_byte"),
1636 ( -1, 0x19, 0x19, 0x24, "flat_store_byte_d16_hi"),
1637 (0x1a, 0x1a, 0x1a, 0x19, "flat_store_short"),
1638 ( -1, 0x1b, 0x1b, 0x25, "flat_store_short_d16_hi"),
1639 (0x1c, 0x1c, 0x1c, 0x1a, "flat_store_dword"),
1640 (0x1d, 0x1d, 0x1d, 0x1b, "flat_store_dwordx2"),
1641 (0x1f, 0x1e, 0x1f, 0x1c, "flat_store_dwordx3"),
1642 (0x1e, 0x1f, 0x1e, 0x1d, "flat_store_dwordx4"),
1643 ( -1, 0x20, 0x20, 0x1e, "flat_load_ubyte_d16"),
1644 ( -1, 0x21, 0x21, 0x21, "flat_load_ubyte_d16_hi"),
1645 ( -1, 0x22, 0x22, 0x1f, "flat_load_sbyte_d16"),
1646 ( -1, 0x23, 0x23, 0x22, "flat_load_sbyte_d16_hi"),
1647 ( -1, 0x24, 0x24, 0x20, "flat_load_short_d16"),
1648 ( -1, 0x25, 0x25, 0x23, "flat_load_short_d16_hi"),
1649 (0x30, 0x40, 0x30, 0x33, "flat_atomic_swap"),
1650 (0x31, 0x41, 0x31, 0x34, "flat_atomic_cmpswap"),
1651 (0x32, 0x42, 0x32, 0x35, "flat_atomic_add"),
1652 (0x33, 0x43, 0x33, 0x36, "flat_atomic_sub"),
1653 (0x35, 0x44, 0x35, 0x38, "flat_atomic_smin"),
1654 (0x36, 0x45, 0x36, 0x39, "flat_atomic_umin"),
1655 (0x37, 0x46, 0x37, 0x3a, "flat_atomic_smax"),
1656 (0x38, 0x47, 0x38, 0x3b, "flat_atomic_umax"),
1657 (0x39, 0x48, 0x39, 0x3c, "flat_atomic_and"),
1658 (0x3a, 0x49, 0x3a, 0x3d, "flat_atomic_or"),
1659 (0x3b, 0x4a, 0x3b, 0x3e, "flat_atomic_xor"),
1660 (0x3c, 0x4b, 0x3c, 0x3f, "flat_atomic_inc"),
1661 (0x3d, 0x4c, 0x3d, 0x40, "flat_atomic_dec"),
1662 (0x3e, -1, 0x3e, 0x50, "flat_atomic_fcmpswap"),
1663 (0x3f, -1, 0x3f, 0x51, "flat_atomic_fmin"),
1664 (0x40, -1, 0x40, 0x52, "flat_atomic_fmax"),
1665 (0x50, 0x60, 0x50, 0x41, "flat_atomic_swap_x2"),
1666 (0x51, 0x61, 0x51, 0x42, "flat_atomic_cmpswap_x2"),
1667 (0x52, 0x62, 0x52, 0x43, "flat_atomic_add_x2"),
1668 (0x53, 0x63, 0x53, 0x44, "flat_atomic_sub_x2"),
1669 (0x55, 0x64, 0x55, 0x45, "flat_atomic_smin_x2"),
1670 (0x56, 0x65, 0x56, 0x46, "flat_atomic_umin_x2"),
1671 (0x57, 0x66, 0x57, 0x47, "flat_atomic_smax_x2"),
1672 (0x58, 0x67, 0x58, 0x48, "flat_atomic_umax_x2"),
1673 (0x59, 0x68, 0x59, 0x49, "flat_atomic_and_x2"),
1674 (0x5a, 0x69, 0x5a, 0x4a, "flat_atomic_or_x2"),
1675 (0x5b, 0x6a, 0x5b, 0x4b, "flat_atomic_xor_x2"),
1676 (0x5c, 0x6b, 0x5c, 0x4c, "flat_atomic_inc_x2"),
1677 (0x5d, 0x6c, 0x5d, 0x4d, "flat_atomic_dec_x2"),
1678 (0x5e, -1, 0x5e, -1, "flat_atomic_fcmpswap_x2"),
1679 (0x5f, -1, 0x5f, -1, "flat_atomic_fmin_x2"),
1680 (0x60, -1, 0x60, -1, "flat_atomic_fmax_x2"),
1681 ( -1, -1, -1, 0x56, "flat_atomic_add_f32"),
1683 for (gfx7, gfx8, gfx10, gfx11, name) in FLAT:
1684 opcode(name, gfx7, gfx8, gfx10, gfx11, Format.FLAT, InstrClass.VMem, is_atomic = "atomic" in name) #TODO: also LDS?
1688 (0x10, 0x08, 0x10, "global_load_ubyte"),
1689 (0x11, 0x09, 0x11, "global_load_sbyte"),
1690 (0x12, 0x0a, 0x12, "global_load_ushort"),
1691 (0x13, 0x0b, 0x13, "global_load_sshort"),
1692 (0x14, 0x0c, 0x14, "global_load_dword"),
1693 (0x15, 0x0d, 0x15, "global_load_dwordx2"),
1694 (0x16, 0x0f, 0x16, "global_load_dwordx3"),
1695 (0x17, 0x0e, 0x17, "global_load_dwordx4"),
1696 (0x18, 0x18, 0x18, "global_store_byte"),
1697 (0x19, 0x19, 0x24, "global_store_byte_d16_hi"),
1698 (0x1a, 0x1a, 0x19, "global_store_short"),
1699 (0x1b, 0x1b, 0x25, "global_store_short_d16_hi"),
1700 (0x1c, 0x1c, 0x1a, "global_store_dword"),
1701 (0x1d, 0x1d, 0x1b, "global_store_dwordx2"),
1702 (0x1e, 0x1f, 0x1c, "global_store_dwordx3"),
1703 (0x1f, 0x1e, 0x1d, "global_store_dwordx4"),
1704 (0x20, 0x20, 0x1e, "global_load_ubyte_d16"),
1705 (0x21, 0x21, 0x21, "global_load_ubyte_d16_hi"),
1706 (0x22, 0x22, 0x1f, "global_load_sbyte_d16"),
1707 (0x23, 0x23, 0x22, "global_load_sbyte_d16_hi"),
1708 (0x24, 0x24, 0x20, "global_load_short_d16"),
1709 (0x25, 0x25, 0x23, "global_load_short_d16_hi"),
1710 (0x40, 0x30, 0x33, "global_atomic_swap"),
1711 (0x41, 0x31, 0x34, "global_atomic_cmpswap"),
1712 (0x42, 0x32, 0x35, "global_atomic_add"),
1713 (0x43, 0x33, 0x36, "global_atomic_sub"),
1714 (0x44, 0x35, 0x38, "global_atomic_smin"),
1715 (0x45, 0x36, 0x39, "global_atomic_umin"),
1716 (0x46, 0x37, 0x3a, "global_atomic_smax"),
1717 (0x47, 0x38, 0x3b, "global_atomic_umax"),
1718 (0x48, 0x39, 0x3c, "global_atomic_and"),
1719 (0x49, 0x3a, 0x3d, "global_atomic_or"),
1720 (0x4a, 0x3b, 0x3e, "global_atomic_xor"),
1721 (0x4b, 0x3c, 0x3f, "global_atomic_inc"),
1722 (0x4c, 0x3d, 0x40, "global_atomic_dec"),
1723 ( -1, 0x3e, 0x50, "global_atomic_fcmpswap"),
1724 ( -1, 0x3f, 0x51, "global_atomic_fmin"),
1725 ( -1, 0x40, 0x52, "global_atomic_fmax"),
1726 (0x60, 0x50, 0x41, "global_atomic_swap_x2"),
1727 (0x61, 0x51, 0x42, "global_atomic_cmpswap_x2"),
1728 (0x62, 0x52, 0x43, "global_atomic_add_x2"),
1729 (0x63, 0x53, 0x44, "global_atomic_sub_x2"),
1730 (0x64, 0x55, 0x45, "global_atomic_smin_x2"),
1731 (0x65, 0x56, 0x46, "global_atomic_umin_x2"),
1732 (0x66, 0x57, 0x47, "global_atomic_smax_x2"),
1733 (0x67, 0x58, 0x48, "global_atomic_umax_x2"),
1734 (0x68, 0x59, 0x49, "global_atomic_and_x2"),
1735 (0x69, 0x5a, 0x4a, "global_atomic_or_x2"),
1736 (0x6a, 0x5b, 0x4b, "global_atomic_xor_x2"),
1737 (0x6b, 0x5c, 0x4c, "global_atomic_inc_x2"),
1738 (0x6c, 0x5d, 0x4d, "global_atomic_dec_x2"),
1739 ( -1, 0x5e, -1, "global_atomic_fcmpswap_x2"),
1740 ( -1, 0x5f, -1, "global_atomic_fmin_x2"),
1741 ( -1, 0x60, -1, "global_atomic_fmax_x2"),
1742 ( -1, 0x16, 0x28, "global_load_dword_addtid"), #GFX10.3+
1743 ( -1, 0x17, 0x29, "global_store_dword_addtid"), #GFX10.3+
1744 ( -1, 0x34, 0x37, "global_atomic_csub"), #GFX10.3+. seems glc must be set
1745 ( -1, -1, 0x56, "global_atomic_add_f32"),
1747 for (gfx8, gfx10, gfx11, name) in GLOBAL:
1748 opcode(name, -1, gfx8, gfx10, gfx11, Format.GLOBAL, InstrClass.VMem, is_atomic = "atomic" in name)
1752 (0x10, 0x08, 0x10, "scratch_load_ubyte"),
1753 (0x11, 0x09, 0x11, "scratch_load_sbyte"),
1754 (0x12, 0x0a, 0x12, "scratch_load_ushort"),
1755 (0x13, 0x0b, 0x13, "scratch_load_sshort"),
1756 (0x14, 0x0c, 0x14, "scratch_load_dword"),
1757 (0x15, 0x0d, 0x15, "scratch_load_dwordx2"),
1758 (0x16, 0x0f, 0x16, "scratch_load_dwordx3"),
1759 (0x17, 0x0e, 0x17, "scratch_load_dwordx4"),
1760 (0x18, 0x18, 0x18, "scratch_store_byte"),
1761 (0x19, 0x19, 0x24, "scratch_store_byte_d16_hi"),
1762 (0x1a, 0x1a, 0x19, "scratch_store_short"),
1763 (0x1b, 0x1b, 0x25, "scratch_store_short_d16_hi"),
1764 (0x1c, 0x1c, 0x1a, "scratch_store_dword"),
1765 (0x1d, 0x1d, 0x1b, "scratch_store_dwordx2"),
1766 (0x1e, 0x1f, 0x1c, "scratch_store_dwordx3"),
1767 (0x1f, 0x1e, 0x1d, "scratch_store_dwordx4"),
1768 (0x20, 0x20, 0x1e, "scratch_load_ubyte_d16"),
1769 (0x21, 0x21, 0x21, "scratch_load_ubyte_d16_hi"),
1770 (0x22, 0x22, 0x1f, "scratch_load_sbyte_d16"),
1771 (0x23, 0x23, 0x22, "scratch_load_sbyte_d16_hi"),
1772 (0x24, 0x24, 0x20, "scratch_load_short_d16"),
1773 (0x25, 0x25, 0x23, "scratch_load_short_d16_hi"),
1775 for (gfx8, gfx10, gfx11, name) in SCRATCH:
1776 opcode(name, -1, gfx8, gfx10, gfx11, Format.SCRATCH, InstrClass.VMem)
1778 # check for duplicate opcode numbers
1779 for ver in ['gfx9', 'gfx10', 'gfx11']:
1781 for op in opcodes.values():
1782 if op.format in [Format.PSEUDO, Format.PSEUDO_BRANCH, Format.PSEUDO_BARRIER, Format.PSEUDO_REDUCTION]:
1785 num = getattr(op, 'opcode_' + ver)
1789 key = (op.format, num)
1791 if key in op_to_name:
1793 names = set([op_to_name[key], op.name])
1794 if ver in ['gfx8', 'gfx9', 'gfx11'] and names == set(['v_mul_lo_i32', 'v_mul_lo_u32']):
1796 # v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.3
1797 if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 'v_fma_legacy_f32']):
1799 # v_mac_legacy_f32 is replaced with v_fmac_legacy_f32 on GFX10.3
1800 if ver == 'gfx10' and names == set(['v_mac_legacy_f32', 'v_fmac_legacy_f32']):
1803 print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver))
1806 op_to_name[key] = op.name