2 # Copyright (c) 2018 Valve Corporation
4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the "Software"),
6 # to deal in the Software without restriction, including without limitation
7 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 # and/or sell copies of the Software, and to permit persons to whom the
9 # Software is furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice (including the next
12 # paragraph) shall be included in all copies or substantial portions of the
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 # Class that represents all the information we have about the opcode
25 # NOTE: this must be kept in sync with aco_op_info
30 class InstrClass(Enum):
36 ValuTranscendental32 = 5
40 ValuDoubleTranscendental = 9
83 def get_builder_fields(self):
84 if self == Format.SOPK:
85 return [('uint16_t', 'imm', None)]
86 elif self == Format.SOPP:
87 return [('uint32_t', 'block', '-1'),
88 ('uint32_t', 'imm', '0')]
89 elif self == Format.SMEM:
90 return [('memory_sync_info', 'sync', 'memory_sync_info()'),
91 ('bool', 'glc', 'false'),
92 ('bool', 'dlc', 'false'),
93 ('bool', 'nv', 'false')]
94 elif self == Format.DS:
95 return [('uint16_t', 'offset0', '0'),
96 ('uint8_t', 'offset1', '0'),
97 ('bool', 'gds', 'false')]
98 elif self == Format.LDSDIR:
99 return [('uint8_t', 'attr', 0),
100 ('uint8_t', 'attr_chan', 0),
101 ('memory_sync_info', 'sync', 'memory_sync_info()'),
102 ('uint8_t', 'wait_vdst', 15)]
103 elif self == Format.MTBUF:
104 return [('unsigned', 'dfmt', None),
105 ('unsigned', 'nfmt', None),
106 ('unsigned', 'offset', None),
107 ('bool', 'offen', None),
108 ('bool', 'idxen', 'false'),
109 ('bool', 'disable_wqm', 'false'),
110 ('bool', 'glc', 'false'),
111 ('bool', 'dlc', 'false'),
112 ('bool', 'slc', 'false'),
113 ('bool', 'tfe', 'false')]
114 elif self == Format.MUBUF:
115 return [('unsigned', 'offset', None),
116 ('bool', 'offen', None),
117 ('bool', 'swizzled', 'false'),
118 ('bool', 'idxen', 'false'),
119 ('bool', 'addr64', 'false'),
120 ('bool', 'disable_wqm', 'false'),
121 ('bool', 'glc', 'false'),
122 ('bool', 'dlc', 'false'),
123 ('bool', 'slc', 'false'),
124 ('bool', 'tfe', 'false'),
125 ('bool', 'lds', 'false')]
126 elif self == Format.MIMG:
127 return [('unsigned', 'dmask', '0xF'),
128 ('bool', 'da', 'false'),
129 ('bool', 'unrm', 'false'),
130 ('bool', 'disable_wqm', 'false'),
131 ('bool', 'glc', 'false'),
132 ('bool', 'dlc', 'false'),
133 ('bool', 'slc', 'false'),
134 ('bool', 'tfe', 'false'),
135 ('bool', 'lwe', 'false'),
136 ('bool', 'r128', 'false'),
137 ('bool', 'a16', 'false'),
138 ('bool', 'd16', 'false')]
139 return [('unsigned', 'attribute', None),
140 ('unsigned', 'component', None)]
141 elif self == Format.EXP:
142 return [('unsigned', 'enabled_mask', None),
143 ('unsigned', 'dest', None),
144 ('bool', 'compr', 'false', 'compressed'),
145 ('bool', 'done', 'false'),
146 ('bool', 'vm', 'false', 'valid_mask')]
147 elif self == Format.PSEUDO_BRANCH:
148 return [('uint32_t', 'target0', '0', 'target[0]'),
149 ('uint32_t', 'target1', '0', 'target[1]')]
150 elif self == Format.PSEUDO_REDUCTION:
151 return [('ReduceOp', 'op', None, 'reduce_op'),
152 ('unsigned', 'cluster_size', '0')]
153 elif self == Format.PSEUDO_BARRIER:
154 return [('memory_sync_info', 'sync', None),
155 ('sync_scope', 'exec_scope', 'scope_invocation')]
156 elif self == Format.VINTRP:
157 return [('unsigned', 'attribute', None),
158 ('unsigned', 'component', None)]
159 elif self == Format.DPP16:
160 return [('uint16_t', 'dpp_ctrl', None),
161 ('uint8_t', 'row_mask', '0xF'),
162 ('uint8_t', 'bank_mask', '0xF'),
163 ('bool', 'bound_ctrl', 'true')]
164 elif self == Format.VOP3P:
165 return [('uint8_t', 'opsel_lo', None),
166 ('uint8_t', 'opsel_hi', None)]
167 elif self == Format.VINTERP_INREG:
168 return [('unsigned', 'wait_exp', 7),
169 ('uint8_t', 'opsel', 0)]
170 elif self in [Format.FLAT, Format.GLOBAL, Format.SCRATCH]:
171 return [('int16_t', 'offset', 0),
172 ('memory_sync_info', 'sync', 'memory_sync_info()'),
173 ('bool', 'glc', 'false'),
174 ('bool', 'slc', 'false'),
175 ('bool', 'lds', 'false'),
176 ('bool', 'nv', 'false')]
180 def get_builder_field_names(self):
181 return [f[1] for f in self.get_builder_fields()]
183 def get_builder_field_dests(self):
184 return [(f[3] if len(f) >= 4 else f[1]) for f in self.get_builder_fields()]
186 def get_builder_field_decls(self):
187 return [('%s %s=%s' % (f[0], f[1], f[2]) if f[2] != None else '%s %s' % (f[0], f[1])) for f in self.get_builder_fields()]
189 def get_builder_initialization(self, num_operands):
191 if self == Format.SDWA:
192 for i in range(min(num_operands, 2)):
193 res += 'instr->sel[{0}] = SubdwordSel(op{0}.op.bytes(), 0, false);'.format(i)
194 res += 'instr->dst_sel = SubdwordSel(def0.bytes(), 0, false);\n'
198 class Opcode(object):
199 """Class that represents all the information we have about the opcode
200 NOTE: this must be kept in sync with aco_op_info
202 def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, opcode_gfx11, format, input_mod, output_mod, is_atomic, cls):
205 - name is the name of the opcode (prepend nir_op_ for the enum name)
206 - all types are strings that get nir_type_ prepended to them
207 - input_types is a list of types
208 - algebraic_properties is a space-separated string, where nir_op_is_ is
209 prepended before each entry
210 - const_expr is an expression or series of statements that computes the
211 constant value of the opcode given the constant values of its inputs.
213 assert isinstance(name, str)
214 assert isinstance(opcode_gfx7, int)
215 assert isinstance(opcode_gfx9, int)
216 assert isinstance(opcode_gfx10, int)
217 assert isinstance(opcode_gfx11, int)
218 assert isinstance(format, Format)
219 assert isinstance(input_mod, bool)
220 assert isinstance(output_mod, bool)
223 self.opcode_gfx7 = opcode_gfx7
224 self.opcode_gfx9 = opcode_gfx9
225 self.opcode_gfx10 = opcode_gfx10
226 self.opcode_gfx11 = opcode_gfx11
227 self.input_mod = "1" if input_mod else "0"
228 self.output_mod = "1" if output_mod else "0"
229 self.is_atomic = "1" if is_atomic else "0"
233 parts = name.replace('_e64', '').rsplit('_', 2)
236 op_dtype_sizes = {'{}{}'.format(prefix, size) : size for prefix in 'biuf' for size in [64, 32, 24, 16]}
237 # inline constants are 32-bit for 16-bit integer/typeless instructions: https://reviews.llvm.org/D81841
238 op_dtype_sizes['b16'] = 32
239 op_dtype_sizes['i16'] = 32
240 op_dtype_sizes['u16'] = 32
242 # If we can't tell the operand size, default to 32.
243 self.operand_size = op_dtype_sizes.get(op_dtype, 32)
245 # exceptions for operands:
247 self.operand_size = 0
249 self.operand_size = 32
250 elif name in ['v_mad_u64_u32', 'v_mad_i64_i32']:
251 self.operand_size = 0
252 elif self.operand_size == 24:
253 self.operand_size = 32
254 elif op_dtype == 'u8' or op_dtype == 'i8':
255 self.operand_size = 32
256 elif name in ['v_cvt_f32_ubyte0', 'v_cvt_f32_ubyte1',
257 'v_cvt_f32_ubyte2', 'v_cvt_f32_ubyte3']:
258 self.operand_size = 32
260 # global dictionary of opcodes
263 def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, opcode_gfx11 = -1, format = Format.PSEUDO, cls = InstrClass.Other, input_mod = False, output_mod = False, is_atomic = False):
264 assert name not in opcodes
265 opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, opcode_gfx11, format, input_mod, output_mod, is_atomic, cls)
267 def default_class(opcodes, cls):
269 if isinstance(op[-1], InstrClass):
274 opcode("exp", 0, 0, 0, 0, format = Format.EXP, cls = InstrClass.Export)
275 opcode("p_parallelcopy")
279 opcode("p_linear_phi")
280 opcode("p_as_uniform")
281 opcode("p_unit_test")
283 opcode("p_create_vector")
284 opcode("p_extract_vector")
285 opcode("p_split_vector")
287 # start/end the parts where we can use exec based instructions
289 opcode("p_logical_start")
290 opcode("p_logical_end")
292 # e.g. subgroupMin() in SPIR-V
293 opcode("p_reduce", format=Format.PSEUDO_REDUCTION)
294 # e.g. subgroupInclusiveMin()
295 opcode("p_inclusive_scan", format=Format.PSEUDO_REDUCTION)
296 # e.g. subgroupExclusiveMin()
297 opcode("p_exclusive_scan", format=Format.PSEUDO_REDUCTION)
299 opcode("p_branch", format=Format.PSEUDO_BRANCH)
300 opcode("p_cbranch", format=Format.PSEUDO_BRANCH)
301 opcode("p_cbranch_z", format=Format.PSEUDO_BRANCH)
302 opcode("p_cbranch_nz", format=Format.PSEUDO_BRANCH)
304 opcode("p_barrier", format=Format.PSEUDO_BARRIER)
309 # Start/end linear vgprs. p_start_linear_vgpr can take an operand to copy from, into the linear vgpr
310 opcode("p_start_linear_vgpr")
311 opcode("p_end_linear_vgpr")
314 opcode("p_discard_if")
315 opcode("p_demote_to_helper")
316 opcode("p_is_helper")
317 opcode("p_exit_early_if")
319 # simulates proper bpermute behavior on GFX6
320 # definitions: result VGPR, temp EXEC, clobbered VCC
321 # operands: index, input data
322 opcode("p_bpermute_gfx6")
324 # simulates proper bpermute behavior on GFX10
325 # definitions: result VGPR, temp EXEC, clobbered SCC
326 # operands: index * 4, input data, same half (bool)
327 opcode("p_bpermute_gfx10w64")
329 # simulates proper bpermute behavior on GFX11
330 # definitions: result VGPR, temp EXEC, clobbered SCC
331 # operands: linear VGPR, index * 4, input data, same half (bool)
332 opcode("p_bpermute_gfx11w64")
334 # creates a lane mask where only the first active lane is selected
337 opcode("p_constaddr")
338 opcode("p_resume_shader_address")
340 # These don't have to be pseudo-ops, but it makes optimization easier to only
341 # have to consider two instructions.
342 # (src0 >> (index * bits)) & ((1 << bits) - 1) with optional sign extension
343 opcode("p_extract") # src1=index, src2=bits, src3=signext
344 # (src0 & ((1 << bits) - 1)) << (index * bits)
345 opcode("p_insert") # src1=index, src2=bits
347 opcode("p_init_scratch")
349 # jumps to a shader epilog
350 opcode("p_jump_to_epilog")
352 # loads and interpolates a fragment shader input with a correct exec mask
353 #dst0=result, src0=linear_vgpr, src1=attribute, src2=component, src3=coord1, src4=coord2, src5=m0
354 #dst0=result, src0=linear_vgpr, src1=attribute, src2=component, src3=dpp_ctrl, src4=m0
355 opcode("p_interp_gfx11")
357 # performs dual source MRTs swizzling and emits exports on GFX11
358 opcode("p_dual_src_export_gfx11")
360 # SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
362 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
363 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_add_u32"),
364 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "s_sub_u32"),
365 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "s_add_i32"),
366 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "s_sub_i32"),
367 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "s_addc_u32"),
368 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "s_subb_u32"),
369 (0x06, 0x06, 0x06, 0x06, 0x06, 0x12, "s_min_i32"),
370 (0x07, 0x07, 0x07, 0x07, 0x07, 0x13, "s_min_u32"),
371 (0x08, 0x08, 0x08, 0x08, 0x08, 0x14, "s_max_i32"),
372 (0x09, 0x09, 0x09, 0x09, 0x09, 0x15, "s_max_u32"),
373 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x30, "s_cselect_b32"),
374 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x31, "s_cselect_b64"),
375 (0x0e, 0x0e, 0x0c, 0x0c, 0x0e, 0x16, "s_and_b32"),
376 (0x0f, 0x0f, 0x0d, 0x0d, 0x0f, 0x17, "s_and_b64"),
377 (0x10, 0x10, 0x0e, 0x0e, 0x10, 0x18, "s_or_b32"),
378 (0x11, 0x11, 0x0f, 0x0f, 0x11, 0x19, "s_or_b64"),
379 (0x12, 0x12, 0x10, 0x10, 0x12, 0x1a, "s_xor_b32"),
380 (0x13, 0x13, 0x11, 0x11, 0x13, 0x1b, "s_xor_b64"),
381 (0x14, 0x14, 0x12, 0x12, 0x14, 0x22, "s_andn2_b32"), #s_and_not1_b32 in GFX11
382 (0x15, 0x15, 0x13, 0x13, 0x15, 0x23, "s_andn2_b64"), #s_and_not1_b64 in GFX11
383 (0x16, 0x16, 0x14, 0x14, 0x16, 0x24, "s_orn2_b32"), #s_or_not1_b32 in GFX11
384 (0x17, 0x17, 0x15, 0x15, 0x17, 0x25, "s_orn2_b64"), #s_or_not1_b64 in GFX11
385 (0x18, 0x18, 0x16, 0x16, 0x18, 0x1c, "s_nand_b32"),
386 (0x19, 0x19, 0x17, 0x17, 0x19, 0x1d, "s_nand_b64"),
387 (0x1a, 0x1a, 0x18, 0x18, 0x1a, 0x1e, "s_nor_b32"),
388 (0x1b, 0x1b, 0x19, 0x19, 0x1b, 0x1f, "s_nor_b64"),
389 (0x1c, 0x1c, 0x1a, 0x1a, 0x1c, 0x20, "s_xnor_b32"),
390 (0x1d, 0x1d, 0x1b, 0x1b, 0x1d, 0x21, "s_xnor_b64"),
391 (0x1e, 0x1e, 0x1c, 0x1c, 0x1e, 0x08, "s_lshl_b32"),
392 (0x1f, 0x1f, 0x1d, 0x1d, 0x1f, 0x09, "s_lshl_b64"),
393 (0x20, 0x20, 0x1e, 0x1e, 0x20, 0x0a, "s_lshr_b32"),
394 (0x21, 0x21, 0x1f, 0x1f, 0x21, 0x0b, "s_lshr_b64"),
395 (0x22, 0x22, 0x20, 0x20, 0x22, 0x0c, "s_ashr_i32"),
396 (0x23, 0x23, 0x21, 0x21, 0x23, 0x0d, "s_ashr_i64"),
397 (0x24, 0x24, 0x22, 0x22, 0x24, 0x2a, "s_bfm_b32"),
398 (0x25, 0x25, 0x23, 0x23, 0x25, 0x2b, "s_bfm_b64"),
399 (0x26, 0x26, 0x24, 0x24, 0x26, 0x2c, "s_mul_i32"),
400 (0x27, 0x27, 0x25, 0x25, 0x27, 0x26, "s_bfe_u32"),
401 (0x28, 0x28, 0x26, 0x26, 0x28, 0x27, "s_bfe_i32"),
402 (0x29, 0x29, 0x27, 0x27, 0x29, 0x28, "s_bfe_u64"),
403 (0x2a, 0x2a, 0x28, 0x28, 0x2a, 0x29, "s_bfe_i64"),
404 (0x2b, 0x2b, 0x29, 0x29, -1, -1, "s_cbranch_g_fork", InstrClass.Branch),
405 (0x2c, 0x2c, 0x2a, 0x2a, 0x2c, 0x06, "s_absdiff_i32"),
406 ( -1, -1, 0x2b, 0x2b, -1, -1, "s_rfe_restore_b64", InstrClass.Branch),
407 ( -1, -1, -1, 0x2e, 0x2e, 0x0e, "s_lshl1_add_u32"),
408 ( -1, -1, -1, 0x2f, 0x2f, 0x0f, "s_lshl2_add_u32"),
409 ( -1, -1, -1, 0x30, 0x30, 0x10, "s_lshl3_add_u32"),
410 ( -1, -1, -1, 0x31, 0x31, 0x11, "s_lshl4_add_u32"),
411 ( -1, -1, -1, 0x32, 0x32, 0x32, "s_pack_ll_b32_b16"),
412 ( -1, -1, -1, 0x33, 0x33, 0x33, "s_pack_lh_b32_b16"),
413 ( -1, -1, -1, 0x34, 0x34, 0x34, "s_pack_hh_b32_b16"),
414 ( -1, -1, -1, -1, -1, 0x35, "s_pack_hl_b32_b16"),
415 ( -1, -1, -1, 0x2c, 0x35, 0x2d, "s_mul_hi_u32"),
416 ( -1, -1, -1, 0x2d, 0x36, 0x2e, "s_mul_hi_i32"),
417 # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP2.
418 ( -1, -1, -1, -1, -1, -1, "p_constaddr_addlo"),
419 ( -1, -1, -1, -1, -1, -1, "p_resumeaddr_addlo"),
421 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, cls) in default_class(SOP2, InstrClass.Salu):
422 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOP2, cls)
425 # SOPK instructions: 0 input (+ imm), 1 output + optional scc
427 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
428 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_movk_i32"),
429 ( -1, -1, -1, -1, 0x01, 0x01, "s_version"),
430 (0x02, 0x02, 0x01, 0x01, 0x02, 0x02, "s_cmovk_i32"),
431 (0x03, 0x03, 0x02, 0x02, 0x03, 0x03, "s_cmpk_eq_i32"),
432 (0x04, 0x04, 0x03, 0x03, 0x04, 0x04, "s_cmpk_lg_i32"),
433 (0x05, 0x05, 0x04, 0x04, 0x05, 0x05, "s_cmpk_gt_i32"),
434 (0x06, 0x06, 0x05, 0x05, 0x06, 0x06, "s_cmpk_ge_i32"),
435 (0x07, 0x07, 0x06, 0x06, 0x07, 0x07, "s_cmpk_lt_i32"),
436 (0x08, 0x08, 0x07, 0x07, 0x08, 0x08, "s_cmpk_le_i32"),
437 (0x09, 0x09, 0x08, 0x08, 0x09, 0x09, "s_cmpk_eq_u32"),
438 (0x0a, 0x0a, 0x09, 0x09, 0x0a, 0x0a, "s_cmpk_lg_u32"),
439 (0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, "s_cmpk_gt_u32"),
440 (0x0c, 0x0c, 0x0b, 0x0b, 0x0c, 0x0c, "s_cmpk_ge_u32"),
441 (0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d, "s_cmpk_lt_u32"),
442 (0x0e, 0x0e, 0x0d, 0x0d, 0x0e, 0x0e, "s_cmpk_le_u32"),
443 (0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f, "s_addk_i32"),
444 (0x10, 0x10, 0x0f, 0x0f, 0x10, 0x10, "s_mulk_i32"),
445 (0x11, 0x11, 0x10, 0x10, -1, -1, "s_cbranch_i_fork", InstrClass.Branch),
446 (0x12, 0x12, 0x11, 0x11, 0x12, 0x11, "s_getreg_b32"),
447 (0x13, 0x13, 0x12, 0x12, 0x13, 0x12, "s_setreg_b32"),
448 (0x15, 0x15, 0x14, 0x14, 0x15, 0x13, "s_setreg_imm32_b32"), # requires 32bit literal
449 ( -1, -1, 0x15, 0x15, 0x16, 0x14, "s_call_b64", InstrClass.Branch),
450 ( -1, -1, -1, -1, 0x17, 0x18, "s_waitcnt_vscnt", InstrClass.Waitcnt),
451 ( -1, -1, -1, -1, 0x18, 0x19, "s_waitcnt_vmcnt", InstrClass.Waitcnt),
452 ( -1, -1, -1, -1, 0x19, 0x1a, "s_waitcnt_expcnt", InstrClass.Waitcnt),
453 ( -1, -1, -1, -1, 0x1a, 0x1b, "s_waitcnt_lgkmcnt", InstrClass.Waitcnt),
454 ( -1, -1, -1, -1, 0x1b, 0x16, "s_subvector_loop_begin", InstrClass.Branch),
455 ( -1, -1, -1, -1, 0x1c, 0x17, "s_subvector_loop_end", InstrClass.Branch),
457 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, cls) in default_class(SOPK, InstrClass.Salu):
458 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOPK, cls)
461 # SOP1 instructions: 1 input, 1 output (+optional SCC)
463 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
464 (0x03, 0x03, 0x00, 0x00, 0x03, 0x00, "s_mov_b32"),
465 (0x04, 0x04, 0x01, 0x01, 0x04, 0x01, "s_mov_b64"),
466 (0x05, 0x05, 0x02, 0x02, 0x05, 0x02, "s_cmov_b32"),
467 (0x06, 0x06, 0x03, 0x03, 0x06, 0x03, "s_cmov_b64"),
468 (0x07, 0x07, 0x04, 0x04, 0x07, 0x1e, "s_not_b32"),
469 (0x08, 0x08, 0x05, 0x05, 0x08, 0x1f, "s_not_b64"),
470 (0x09, 0x09, 0x06, 0x06, 0x09, 0x1c, "s_wqm_b32"),
471 (0x0a, 0x0a, 0x07, 0x07, 0x0a, 0x1d, "s_wqm_b64"),
472 (0x0b, 0x0b, 0x08, 0x08, 0x0b, 0x04, "s_brev_b32"),
473 (0x0c, 0x0c, 0x09, 0x09, 0x0c, 0x05, "s_brev_b64"),
474 (0x0d, 0x0d, 0x0a, 0x0a, 0x0d, 0x16, "s_bcnt0_i32_b32"),
475 (0x0e, 0x0e, 0x0b, 0x0b, 0x0e, 0x17, "s_bcnt0_i32_b64"),
476 (0x0f, 0x0f, 0x0c, 0x0c, 0x0f, 0x18, "s_bcnt1_i32_b32"),
477 (0x10, 0x10, 0x0d, 0x0d, 0x10, 0x19, "s_bcnt1_i32_b64"),
478 (0x11, 0x11, 0x0e, 0x0e, 0x11, -1, "s_ff0_i32_b32"),
479 (0x12, 0x12, 0x0f, 0x0f, 0x12, -1, "s_ff0_i32_b64"),
480 (0x13, 0x13, 0x10, 0x10, 0x13, 0x08, "s_ff1_i32_b32"), #s_ctz_i32_b32 in GFX11
481 (0x14, 0x14, 0x11, 0x11, 0x14, 0x09, "s_ff1_i32_b64"), #s_ctz_i32_b64 in GFX11
482 (0x15, 0x15, 0x12, 0x12, 0x15, 0x0a, "s_flbit_i32_b32"), #s_clz_i32_u32 in GFX11
483 (0x16, 0x16, 0x13, 0x13, 0x16, 0x0b, "s_flbit_i32_b64"), #s_clz_i32_u64 in GFX11
484 (0x17, 0x17, 0x14, 0x14, 0x17, 0x0c, "s_flbit_i32"), #s_cls_i32 in GFX11
485 (0x18, 0x18, 0x15, 0x15, 0x18, 0x0d, "s_flbit_i32_i64"), #s_cls_i32_i64 in GFX11
486 (0x19, 0x19, 0x16, 0x16, 0x19, 0x0e, "s_sext_i32_i8"),
487 (0x1a, 0x1a, 0x17, 0x17, 0x1a, 0x0f, "s_sext_i32_i16"),
488 (0x1b, 0x1b, 0x18, 0x18, 0x1b, 0x10, "s_bitset0_b32"),
489 (0x1c, 0x1c, 0x19, 0x19, 0x1c, 0x11, "s_bitset0_b64"),
490 (0x1d, 0x1d, 0x1a, 0x1a, 0x1d, 0x12, "s_bitset1_b32"),
491 (0x1e, 0x1e, 0x1b, 0x1b, 0x1e, 0x13, "s_bitset1_b64"),
492 (0x1f, 0x1f, 0x1c, 0x1c, 0x1f, 0x47, "s_getpc_b64"),
493 (0x20, 0x20, 0x1d, 0x1d, 0x20, 0x48, "s_setpc_b64", InstrClass.Branch),
494 (0x21, 0x21, 0x1e, 0x1e, 0x21, 0x49, "s_swappc_b64", InstrClass.Branch),
495 (0x22, 0x22, 0x1f, 0x1f, 0x22, 0x4a, "s_rfe_b64", InstrClass.Branch),
496 (0x24, 0x24, 0x20, 0x20, 0x24, 0x21, "s_and_saveexec_b64"),
497 (0x25, 0x25, 0x21, 0x21, 0x25, 0x23, "s_or_saveexec_b64"),
498 (0x26, 0x26, 0x22, 0x22, 0x26, 0x25, "s_xor_saveexec_b64"),
499 (0x27, 0x27, 0x23, 0x23, 0x27, 0x31, "s_andn2_saveexec_b64"), #s_and_not1_saveexec_b64 in GFX11
500 (0x28, 0x28, 0x24, 0x24, 0x28, 0x33, "s_orn2_saveexec_b64"), #s_or_not1_saveexec_b64 in GFX11
501 (0x29, 0x29, 0x25, 0x25, 0x29, 0x27, "s_nand_saveexec_b64"),
502 (0x2a, 0x2a, 0x26, 0x26, 0x2a, 0x29, "s_nor_saveexec_b64"),
503 (0x2b, 0x2b, 0x27, 0x27, 0x2b, 0x2b, "s_xnor_saveexec_b64"),
504 (0x2c, 0x2c, 0x28, 0x28, 0x2c, 0x1a, "s_quadmask_b32"),
505 (0x2d, 0x2d, 0x29, 0x29, 0x2d, 0x1b, "s_quadmask_b64"),
506 (0x2e, 0x2e, 0x2a, 0x2a, 0x2e, 0x40, "s_movrels_b32"),
507 (0x2f, 0x2f, 0x2b, 0x2b, 0x2f, 0x41, "s_movrels_b64"),
508 (0x30, 0x30, 0x2c, 0x2c, 0x30, 0x42, "s_movreld_b32"),
509 (0x31, 0x31, 0x2d, 0x2d, 0x31, 0x43, "s_movreld_b64"),
510 (0x32, 0x32, 0x2e, 0x2e, -1, -1, "s_cbranch_join", InstrClass.Branch),
511 (0x34, 0x34, 0x30, 0x30, 0x34, 0x15, "s_abs_i32"),
512 (0x35, 0x35, -1, -1, 0x35, -1, "s_mov_fed_b32"),
513 ( -1, -1, 0x32, 0x32, -1, -1, "s_set_gpr_idx_idx"),
514 ( -1, -1, -1, 0x33, 0x37, 0x2d, "s_andn1_saveexec_b64"), #s_and_not0_savexec_b64 in GFX11
515 ( -1, -1, -1, 0x34, 0x38, 0x2f, "s_orn1_saveexec_b64"), #s_or_not0_savexec_b64 in GFX11
516 ( -1, -1, -1, 0x35, 0x39, 0x35, "s_andn1_wrexec_b64"), #s_and_not0_wrexec_b64 in GFX11
517 ( -1, -1, -1, 0x36, 0x3a, 0x37, "s_andn2_wrexec_b64"), #s_and_not1_wrexec_b64 in GFX11
518 ( -1, -1, -1, 0x37, 0x3b, 0x14, "s_bitreplicate_b64_b32"),
519 ( -1, -1, -1, -1, 0x3c, 0x20, "s_and_saveexec_b32"),
520 ( -1, -1, -1, -1, 0x3d, 0x22, "s_or_saveexec_b32"),
521 ( -1, -1, -1, -1, 0x3e, 0x24, "s_xor_saveexec_b32"),
522 ( -1, -1, -1, -1, 0x3f, 0x30, "s_andn2_saveexec_b32"), #s_and_not1_saveexec_b32 in GFX11
523 ( -1, -1, -1, -1, 0x40, 0x32, "s_orn2_saveexec_b32"), #s_or_not1_saveexec_b32 in GFX11
524 ( -1, -1, -1, -1, 0x41, 0x26, "s_nand_saveexec_b32"),
525 ( -1, -1, -1, -1, 0x42, 0x28, "s_nor_saveexec_b32"),
526 ( -1, -1, -1, -1, 0x43, 0x2a, "s_xnor_saveexec_b32"),
527 ( -1, -1, -1, -1, 0x44, 0x2c, "s_andn1_saveexec_b32"), #s_and_not0_savexec_b32 in GFX11
528 ( -1, -1, -1, -1, 0x45, 0x2e, "s_orn1_saveexec_b32"), #s_or_not0_savexec_b32 in GFX11
529 ( -1, -1, -1, -1, 0x46, 0x34, "s_andn1_wrexec_b32"), #s_and_not0_wrexec_b32 in GFX11
530 ( -1, -1, -1, -1, 0x47, 0x36, "s_andn2_wrexec_b32"), #s_and_not1_wrexec_b32 in GFX11
531 ( -1, -1, -1, -1, 0x49, 0x44, "s_movrelsd_2_b32"),
532 ( -1, -1, -1, -1, -1, 0x4c, "s_sendmsg_rtn_b32"),
533 ( -1, -1, -1, -1, -1, 0x4d, "s_sendmsg_rtn_b64"),
534 # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1.
535 ( -1, -1, -1, -1, -1, -1, "p_constaddr_getpc"),
536 ( -1, -1, -1, -1, -1, -1, "p_resumeaddr_getpc"),
537 ( -1, -1, -1, -1, -1, -1, "p_load_symbol"),
539 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, cls) in default_class(SOP1, InstrClass.Salu):
540 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOP1, cls)
543 # SOPC instructions: 2 inputs and 0 outputs (+SCC)
545 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
546 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_cmp_eq_i32"),
547 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "s_cmp_lg_i32"),
548 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "s_cmp_gt_i32"),
549 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "s_cmp_ge_i32"),
550 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "s_cmp_lt_i32"),
551 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "s_cmp_le_i32"),
552 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "s_cmp_eq_u32"),
553 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "s_cmp_lg_u32"),
554 (0x08, 0x08, 0x08, 0x08, 0x08, 0x08, "s_cmp_gt_u32"),
555 (0x09, 0x09, 0x09, 0x09, 0x09, 0x09, "s_cmp_ge_u32"),
556 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cmp_lt_u32"),
557 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cmp_le_u32"),
558 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_bitcmp0_b32"),
559 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_bitcmp1_b32"),
560 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_bitcmp0_b64"),
561 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_bitcmp1_b64"),
562 (0x10, 0x10, 0x10, 0x10, -1, -1, "s_setvskip"),
563 ( -1, -1, 0x11, 0x11, -1, -1, "s_set_gpr_idx_on"),
564 ( -1, -1, 0x12, 0x12, 0x12, 0x10, "s_cmp_eq_u64"),
565 ( -1, -1, 0x13, 0x13, 0x13, 0x11, "s_cmp_lg_u64"),
567 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in SOPC:
568 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOPC, InstrClass.Salu)
571 # SOPP instructions: 0 inputs (+optional scc/vcc), 0 outputs
573 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
574 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_nop"),
575 (0x01, 0x01, 0x01, 0x01, 0x01, 0x30, "s_endpgm"),
576 (0x02, 0x02, 0x02, 0x02, 0x02, 0x20, "s_branch", InstrClass.Branch),
577 ( -1, -1, 0x03, 0x03, 0x03, 0x34, "s_wakeup"),
578 (0x04, 0x04, 0x04, 0x04, 0x04, 0x21, "s_cbranch_scc0", InstrClass.Branch),
579 (0x05, 0x05, 0x05, 0x05, 0x05, 0x22, "s_cbranch_scc1", InstrClass.Branch),
580 (0x06, 0x06, 0x06, 0x06, 0x06, 0x23, "s_cbranch_vccz", InstrClass.Branch),
581 (0x07, 0x07, 0x07, 0x07, 0x07, 0x24, "s_cbranch_vccnz", InstrClass.Branch),
582 (0x08, 0x08, 0x08, 0x08, 0x08, 0x25, "s_cbranch_execz", InstrClass.Branch),
583 (0x09, 0x09, 0x09, 0x09, 0x09, 0x26, "s_cbranch_execnz", InstrClass.Branch),
584 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x3d, "s_barrier", InstrClass.Barrier),
585 ( -1, 0x0b, 0x0b, 0x0b, 0x0b, 0x01, "s_setkill"),
586 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x09, "s_waitcnt", InstrClass.Waitcnt),
587 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x02, "s_sethalt"),
588 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x03, "s_sleep"),
589 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x35, "s_setprio"),
590 (0x10, 0x10, 0x10, 0x10, 0x10, 0x36, "s_sendmsg", InstrClass.Sendmsg),
591 (0x11, 0x11, 0x11, 0x11, 0x11, 0x37, "s_sendmsghalt", InstrClass.Sendmsg),
592 (0x12, 0x12, 0x12, 0x12, 0x12, 0x10, "s_trap", InstrClass.Branch),
593 (0x13, 0x13, 0x13, 0x13, 0x13, 0x3c, "s_icache_inv"),
594 (0x14, 0x14, 0x14, 0x14, 0x14, 0x38, "s_incperflevel"),
595 (0x15, 0x15, 0x15, 0x15, 0x15, 0x39, "s_decperflevel"),
596 (0x16, 0x16, 0x16, 0x16, 0x16, 0x3a, "s_ttracedata"),
597 ( -1, 0x17, 0x17, 0x17, 0x17, 0x27, "s_cbranch_cdbgsys", InstrClass.Branch),
598 ( -1, 0x18, 0x18, 0x18, 0x18, 0x28, "s_cbranch_cdbguser", InstrClass.Branch),
599 ( -1, 0x19, 0x19, 0x19, 0x19, 0x29, "s_cbranch_cdbgsys_or_user", InstrClass.Branch),
600 ( -1, 0x1a, 0x1a, 0x1a, 0x1a, 0x2a, "s_cbranch_cdbgsys_and_user", InstrClass.Branch),
601 ( -1, -1, 0x1b, 0x1b, 0x1b, 0x31, "s_endpgm_saved"),
602 ( -1, -1, 0x1c, 0x1c, -1, -1, "s_set_gpr_idx_off"),
603 ( -1, -1, 0x1d, 0x1d, -1, -1, "s_set_gpr_idx_mode"),
604 ( -1, -1, -1, 0x1e, 0x1e, -1, "s_endpgm_ordered_ps_done"),
605 ( -1, -1, -1, -1, 0x1f, 0x1f, "s_code_end"),
606 ( -1, -1, -1, -1, 0x20, 0x04, "s_inst_prefetch"), #s_set_inst_prefetch_distance in GFX11
607 ( -1, -1, -1, -1, 0x21, 0x05, "s_clause"),
608 ( -1, -1, -1, -1, 0x22, 0x0a, "s_wait_idle", InstrClass.Waitcnt),
609 ( -1, -1, -1, -1, 0x23, 0x08, "s_waitcnt_depctr", InstrClass.Waitcnt),
610 ( -1, -1, -1, -1, 0x24, 0x11, "s_round_mode"),
611 ( -1, -1, -1, -1, 0x25, 0x12, "s_denorm_mode"),
612 ( -1, -1, -1, -1, 0x26, 0x3b, "s_ttracedata_imm"),
613 ( -1, -1, -1, -1, -1, 0x07, "s_delay_alu", InstrClass.Waitcnt),
614 ( -1, -1, -1, -1, -1, 0x0b, "s_wait_event"),
616 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, cls) in default_class(SOPP, InstrClass.Salu):
617 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOPP, cls)
620 # SMEM instructions: sbase input (2 sgpr), potentially 2 offset inputs, 1 sdata input/output
621 # Unlike GFX10, GFX10.3 does not have SMEM store, atomic or scratch instructions
623 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name
624 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_load_dword"), #s_load_b32 in GFX11
625 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "s_load_dwordx2"), #s_load_b64 in GFX11
626 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "s_load_dwordx4"), #s_load_b128 in GFX11
627 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "s_load_dwordx8"), #s_load_b256 in GFX11
628 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "s_load_dwordx16"), #s_load_b512 in GFX11
629 ( -1, -1, -1, 0x05, 0x05, -1, "s_scratch_load_dword"),
630 ( -1, -1, -1, 0x06, 0x06, -1, "s_scratch_load_dwordx2"),
631 ( -1, -1, -1, 0x07, 0x07, -1, "s_scratch_load_dwordx4"),
632 (0x08, 0x08, 0x08, 0x08, 0x08, 0x08, "s_buffer_load_dword"), #s_buffer_load_b32 in GFX11
633 (0x09, 0x09, 0x09, 0x09, 0x09, 0x09, "s_buffer_load_dwordx2"), #s_buffer_load_b64 in GFX11
634 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_buffer_load_dwordx4"), #s_buffer_load_b128 in GFX11
635 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_buffer_load_dwordx8"), #s_buffer_load_b256 in GFX11
636 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_buffer_load_dwordx16"), #s_buffer_load_b512 in GFX11
637 ( -1, -1, 0x10, 0x10, 0x10, -1, "s_store_dword"),
638 ( -1, -1, 0x11, 0x11, 0x11, -1, "s_store_dwordx2"),
639 ( -1, -1, 0x12, 0x12, 0x12, -1, "s_store_dwordx4"),
640 ( -1, -1, -1, 0x15, 0x15, -1, "s_scratch_store_dword"),
641 ( -1, -1, -1, 0x16, 0x16, -1, "s_scratch_store_dwordx2"),
642 ( -1, -1, -1, 0x17, 0x17, -1, "s_scratch_store_dwordx4"),
643 ( -1, -1, 0x18, 0x18, 0x18, -1, "s_buffer_store_dword"),
644 ( -1, -1, 0x19, 0x19, 0x19, -1, "s_buffer_store_dwordx2"),
645 ( -1, -1, 0x1a, 0x1a, 0x1a, -1, "s_buffer_store_dwordx4"),
646 ( -1, -1, 0x1f, 0x1f, 0x1f, 0x20, "s_gl1_inv"),
647 (0x1f, 0x1f, 0x20, 0x20, 0x20, 0x21, "s_dcache_inv"),
648 ( -1, -1, 0x21, 0x21, 0x21, -1, "s_dcache_wb"),
649 ( -1, 0x1d, 0x22, 0x22, -1, -1, "s_dcache_inv_vol"),
650 ( -1, -1, 0x23, 0x23, -1, -1, "s_dcache_wb_vol"),
651 (0x1e, 0x1e, 0x24, 0x24, 0x24, -1, "s_memtime"), #GFX6-GFX10
652 ( -1, -1, 0x25, 0x25, 0x25, -1, "s_memrealtime"),
653 ( -1, -1, 0x26, 0x26, 0x26, 0x22, "s_atc_probe"),
654 ( -1, -1, 0x27, 0x27, 0x27, 0x23, "s_atc_probe_buffer"),
655 ( -1, -1, -1, 0x28, 0x28, -1, "s_dcache_discard"),
656 ( -1, -1, -1, 0x29, 0x29, -1, "s_dcache_discard_x2"),
657 ( -1, -1, -1, -1, 0x2a, -1, "s_get_waveid_in_workgroup"),
658 ( -1, -1, -1, 0x40, 0x40, -1, "s_buffer_atomic_swap"),
659 ( -1, -1, -1, 0x41, 0x41, -1, "s_buffer_atomic_cmpswap"),
660 ( -1, -1, -1, 0x42, 0x42, -1, "s_buffer_atomic_add"),
661 ( -1, -1, -1, 0x43, 0x43, -1, "s_buffer_atomic_sub"),
662 ( -1, -1, -1, 0x44, 0x44, -1, "s_buffer_atomic_smin"),
663 ( -1, -1, -1, 0x45, 0x45, -1, "s_buffer_atomic_umin"),
664 ( -1, -1, -1, 0x46, 0x46, -1, "s_buffer_atomic_smax"),
665 ( -1, -1, -1, 0x47, 0x47, -1, "s_buffer_atomic_umax"),
666 ( -1, -1, -1, 0x48, 0x48, -1, "s_buffer_atomic_and"),
667 ( -1, -1, -1, 0x49, 0x49, -1, "s_buffer_atomic_or"),
668 ( -1, -1, -1, 0x4a, 0x4a, -1, "s_buffer_atomic_xor"),
669 ( -1, -1, -1, 0x4b, 0x4b, -1, "s_buffer_atomic_inc"),
670 ( -1, -1, -1, 0x4c, 0x4c, -1, "s_buffer_atomic_dec"),
671 ( -1, -1, -1, 0x60, 0x60, -1, "s_buffer_atomic_swap_x2"),
672 ( -1, -1, -1, 0x61, 0x61, -1, "s_buffer_atomic_cmpswap_x2"),
673 ( -1, -1, -1, 0x62, 0x62, -1, "s_buffer_atomic_add_x2"),
674 ( -1, -1, -1, 0x63, 0x63, -1, "s_buffer_atomic_sub_x2"),
675 ( -1, -1, -1, 0x64, 0x64, -1, "s_buffer_atomic_smin_x2"),
676 ( -1, -1, -1, 0x65, 0x65, -1, "s_buffer_atomic_umin_x2"),
677 ( -1, -1, -1, 0x66, 0x66, -1, "s_buffer_atomic_smax_x2"),
678 ( -1, -1, -1, 0x67, 0x67, -1, "s_buffer_atomic_umax_x2"),
679 ( -1, -1, -1, 0x68, 0x68, -1, "s_buffer_atomic_and_x2"),
680 ( -1, -1, -1, 0x69, 0x69, -1, "s_buffer_atomic_or_x2"),
681 ( -1, -1, -1, 0x6a, 0x6a, -1, "s_buffer_atomic_xor_x2"),
682 ( -1, -1, -1, 0x6b, 0x6b, -1, "s_buffer_atomic_inc_x2"),
683 ( -1, -1, -1, 0x6c, 0x6c, -1, "s_buffer_atomic_dec_x2"),
684 ( -1, -1, -1, 0x80, 0x80, -1, "s_atomic_swap"),
685 ( -1, -1, -1, 0x81, 0x81, -1, "s_atomic_cmpswap"),
686 ( -1, -1, -1, 0x82, 0x82, -1, "s_atomic_add"),
687 ( -1, -1, -1, 0x83, 0x83, -1, "s_atomic_sub"),
688 ( -1, -1, -1, 0x84, 0x84, -1, "s_atomic_smin"),
689 ( -1, -1, -1, 0x85, 0x85, -1, "s_atomic_umin"),
690 ( -1, -1, -1, 0x86, 0x86, -1, "s_atomic_smax"),
691 ( -1, -1, -1, 0x87, 0x87, -1, "s_atomic_umax"),
692 ( -1, -1, -1, 0x88, 0x88, -1, "s_atomic_and"),
693 ( -1, -1, -1, 0x89, 0x89, -1, "s_atomic_or"),
694 ( -1, -1, -1, 0x8a, 0x8a, -1, "s_atomic_xor"),
695 ( -1, -1, -1, 0x8b, 0x8b, -1, "s_atomic_inc"),
696 ( -1, -1, -1, 0x8c, 0x8c, -1, "s_atomic_dec"),
697 ( -1, -1, -1, 0xa0, 0xa0, -1, "s_atomic_swap_x2"),
698 ( -1, -1, -1, 0xa1, 0xa1, -1, "s_atomic_cmpswap_x2"),
699 ( -1, -1, -1, 0xa2, 0xa2, -1, "s_atomic_add_x2"),
700 ( -1, -1, -1, 0xa3, 0xa3, -1, "s_atomic_sub_x2"),
701 ( -1, -1, -1, 0xa4, 0xa4, -1, "s_atomic_smin_x2"),
702 ( -1, -1, -1, 0xa5, 0xa5, -1, "s_atomic_umin_x2"),
703 ( -1, -1, -1, 0xa6, 0xa6, -1, "s_atomic_smax_x2"),
704 ( -1, -1, -1, 0xa7, 0xa7, -1, "s_atomic_umax_x2"),
705 ( -1, -1, -1, 0xa8, 0xa8, -1, "s_atomic_and_x2"),
706 ( -1, -1, -1, 0xa9, 0xa9, -1, "s_atomic_or_x2"),
707 ( -1, -1, -1, 0xaa, 0xaa, -1, "s_atomic_xor_x2"),
708 ( -1, -1, -1, 0xab, 0xab, -1, "s_atomic_inc_x2"),
709 ( -1, -1, -1, 0xac, 0xac, -1, "s_atomic_dec_x2"),
711 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in SMEM:
712 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SMEM, InstrClass.SMem, is_atomic = "atomic" in name)
715 # VOP2 instructions: 2 inputs, 1 output (+ optional vcc)
716 # TODO: misses some GFX6_7 opcodes which were shifted to VOP3 in GFX8
718 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name, input modifiers, output modifiers
719 (0x00, 0x00, 0x00, 0x00, 0x01, 0x01, "v_cndmask_b32", True, False),
720 (0x01, 0x01, -1, -1, -1, -1, "v_readlane_b32", False, False),
721 (0x02, 0x02, -1, -1, -1, -1, "v_writelane_b32", False, False),
722 (0x03, 0x03, 0x01, 0x01, 0x03, 0x03, "v_add_f32", True, True),
723 (0x04, 0x04, 0x02, 0x02, 0x04, 0x04, "v_sub_f32", True, True),
724 (0x05, 0x05, 0x03, 0x03, 0x05, 0x05, "v_subrev_f32", True, True),
725 (0x06, 0x06, -1, -1, 0x06, -1, "v_mac_legacy_f32", True, True), #GFX6,7,10
726 ( -1, -1, -1, -1, 0x06, 0x06, "v_fmac_legacy_f32", True, True), #GFX10.3+, v_fmac_dx9_zero_f32 in GFX11
727 (0x07, 0x07, 0x04, 0x04, 0x07, 0x07, "v_mul_legacy_f32", True, True), #v_mul_dx9_zero_f32 in GFX11
728 (0x08, 0x08, 0x05, 0x05, 0x08, 0x08, "v_mul_f32", True, True),
729 (0x09, 0x09, 0x06, 0x06, 0x09, 0x09, "v_mul_i32_i24", False, False),
730 (0x0a, 0x0a, 0x07, 0x07, 0x0a, 0x0a, "v_mul_hi_i32_i24", False, False),
731 (0x0b, 0x0b, 0x08, 0x08, 0x0b, 0x0b, "v_mul_u32_u24", False, False),
732 (0x0c, 0x0c, 0x09, 0x09, 0x0c, 0x0c, "v_mul_hi_u32_u24", False, False),
733 ( -1, -1, -1, 0x39, 0x0d, -1, "v_dot4c_i32_i8", False, False),
734 (0x0d, 0x0d, -1, -1, -1, -1, "v_min_legacy_f32", True, True),
735 (0x0e, 0x0e, -1, -1, -1, -1, "v_max_legacy_f32", True, True),
736 (0x0f, 0x0f, 0x0a, 0x0a, 0x0f, 0x0f, "v_min_f32", True, True),
737 (0x10, 0x10, 0x0b, 0x0b, 0x10, 0x10, "v_max_f32", True, True),
738 (0x11, 0x11, 0x0c, 0x0c, 0x11, 0x11, "v_min_i32", False, False),
739 (0x12, 0x12, 0x0d, 0x0d, 0x12, 0x12, "v_max_i32", False, False),
740 (0x13, 0x13, 0x0e, 0x0e, 0x13, 0x13, "v_min_u32", False, False),
741 (0x14, 0x14, 0x0f, 0x0f, 0x14, 0x14, "v_max_u32", False, False),
742 (0x15, 0x15, -1, -1, -1, -1, "v_lshr_b32", False, False),
743 (0x16, 0x16, 0x10, 0x10, 0x16, 0x19, "v_lshrrev_b32", False, False),
744 (0x17, 0x17, -1, -1, -1, -1, "v_ashr_i32", False, False),
745 (0x18, 0x18, 0x11, 0x11, 0x18, 0x1a, "v_ashrrev_i32", False, False),
746 (0x19, 0x19, -1, -1, -1, -1, "v_lshl_b32", False, False),
747 (0x1a, 0x1a, 0x12, 0x12, 0x1a, 0x18, "v_lshlrev_b32", False, False),
748 (0x1b, 0x1b, 0x13, 0x13, 0x1b, 0x1b, "v_and_b32", False, False),
749 (0x1c, 0x1c, 0x14, 0x14, 0x1c, 0x1c, "v_or_b32", False, False),
750 (0x1d, 0x1d, 0x15, 0x15, 0x1d, 0x1d, "v_xor_b32", False, False),
751 ( -1, -1, -1, -1, 0x1e, 0x1e, "v_xnor_b32", False, False),
752 (0x1f, 0x1f, 0x16, 0x16, 0x1f, -1, "v_mac_f32", True, True),
753 (0x20, 0x20, 0x17, 0x17, 0x20, -1, "v_madmk_f32", False, False),
754 (0x21, 0x21, 0x18, 0x18, 0x21, -1, "v_madak_f32", False, False),
755 (0x24, 0x24, -1, -1, -1, -1, "v_mbcnt_hi_u32_b32", False, False),
756 (0x25, 0x25, 0x19, 0x19, -1, -1, "v_add_co_u32", False, False), # VOP3B only in RDNA
757 (0x26, 0x26, 0x1a, 0x1a, -1, -1, "v_sub_co_u32", False, False), # VOP3B only in RDNA
758 (0x27, 0x27, 0x1b, 0x1b, -1, -1, "v_subrev_co_u32", False, False), # VOP3B only in RDNA
759 (0x28, 0x28, 0x1c, 0x1c, 0x28, 0x20, "v_addc_co_u32", False, False), # v_add_co_ci_u32 in RDNA
760 (0x29, 0x29, 0x1d, 0x1d, 0x29, 0x21, "v_subb_co_u32", False, False), # v_sub_co_ci_u32 in RDNA
761 (0x2a, 0x2a, 0x1e, 0x1e, 0x2a, 0x22, "v_subbrev_co_u32", False, False), # v_subrev_co_ci_u32 in RDNA
762 ( -1, -1, -1, -1, 0x2b, 0x2b, "v_fmac_f32", True, True),
763 ( -1, -1, -1, -1, 0x2c, 0x2c, "v_fmamk_f32", False, False),
764 ( -1, -1, -1, -1, 0x2d, 0x2d, "v_fmaak_f32", False, False),
765 (0x2f, 0x2f, -1, -1, 0x2f, 0x2f, "v_cvt_pkrtz_f16_f32", True, False), #v_cvt_pk_rtz_f16_f32 in GFX11
766 ( -1, -1, 0x1f, 0x1f, 0x32, 0x32, "v_add_f16", True, True),
767 ( -1, -1, 0x20, 0x20, 0x33, 0x33, "v_sub_f16", True, True),
768 ( -1, -1, 0x21, 0x21, 0x34, 0x34, "v_subrev_f16", True, True),
769 ( -1, -1, 0x22, 0x22, 0x35, 0x35, "v_mul_f16", True, True),
770 ( -1, -1, 0x23, 0x23, -1, -1, "v_mac_f16", True, True),
771 ( -1, -1, 0x24, 0x24, -1, -1, "v_madmk_f16", False, False),
772 ( -1, -1, 0x25, 0x25, -1, -1, "v_madak_f16", False, False),
773 ( -1, -1, 0x26, 0x26, -1, -1, "v_add_u16", False, False),
774 ( -1, -1, 0x27, 0x27, -1, -1, "v_sub_u16", False, False),
775 ( -1, -1, 0x28, 0x28, -1, -1, "v_subrev_u16", False, False),
776 ( -1, -1, 0x29, 0x29, -1, -1, "v_mul_lo_u16", False, False),
777 ( -1, -1, 0x2a, 0x2a, -1, -1, "v_lshlrev_b16", False, False),
778 ( -1, -1, 0x2b, 0x2b, -1, -1, "v_lshrrev_b16", False, False),
779 ( -1, -1, 0x2c, 0x2c, -1, -1, "v_ashrrev_i16", False, False),
780 ( -1, -1, 0x2d, 0x2d, 0x39, 0x39, "v_max_f16", True, True),
781 ( -1, -1, 0x2e, 0x2e, 0x3a, 0x3a, "v_min_f16", True, True),
782 ( -1, -1, 0x2f, 0x2f, -1, -1, "v_max_u16", False, False),
783 ( -1, -1, 0x30, 0x30, -1, -1, "v_max_i16", False, False),
784 ( -1, -1, 0x31, 0x31, -1, -1, "v_min_u16", False, False),
785 ( -1, -1, 0x32, 0x32, -1, -1, "v_min_i16", False, False),
786 ( -1, -1, 0x33, 0x33, 0x3b, 0x3b, "v_ldexp_f16", False, True),
787 ( -1, -1, -1, 0x34, 0x25, 0x25, "v_add_u32", False, False), # called v_add_nc_u32 in RDNA
788 ( -1, -1, -1, 0x35, 0x26, 0x26, "v_sub_u32", False, False), # called v_sub_nc_u32 in RDNA
789 ( -1, -1, -1, 0x36, 0x27, 0x27, "v_subrev_u32", False, False), # called v_subrev_nc_u32 in RDNA
790 ( -1, -1, -1, -1, 0x36, 0x36, "v_fmac_f16", True, True),
791 ( -1, -1, -1, -1, 0x37, 0x37, "v_fmamk_f16", False, False),
792 ( -1, -1, -1, -1, 0x38, 0x38, "v_fmaak_f16", False, False),
793 ( -1, -1, -1, -1, 0x3c, 0x3c, "v_pk_fmac_f16", False, False),
794 ( -1, -1, -1, 0x37, 0x02, 0x02, "v_dot2c_f32_f16", False, False), #v_dot2acc_f32_f16 in GFX11
796 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod) in VOP2:
797 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP2, InstrClass.Valu32, in_mod, out_mod)
800 # VOP1 instructions: instructions with 1 input and 1 output
802 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name, input_modifiers, output_modifiers
803 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "v_nop", False, False),
804 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "v_mov_b32", False, False),
805 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "v_readfirstlane_b32", False, False),
806 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "v_cvt_i32_f64", True, False, InstrClass.ValuDoubleConvert),
807 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "v_cvt_f64_i32", False, True, InstrClass.ValuDoubleConvert),
808 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "v_cvt_f32_i32", False, True),
809 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "v_cvt_f32_u32", False, True),
810 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "v_cvt_u32_f32", True, False),
811 (0x08, 0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False),
812 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True),
813 ( -1, -1, -1, -1, -1, -1, "p_cvt_f16_f32_rtne", True, True),
814 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True),
815 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False), #v_cvt_nearest_i32_f32 in GFX11
816 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False),#v_cvt_floor_i32_f32 in GFX11
817 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "v_cvt_off_f32_i4", False, True),
818 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "v_cvt_f32_f64", True, True, InstrClass.ValuDoubleConvert),
819 (0x10, 0x10, 0x10, 0x10, 0x10, 0x10, "v_cvt_f64_f32", True, True, InstrClass.ValuDoubleConvert),
820 (0x11, 0x11, 0x11, 0x11, 0x11, 0x11, "v_cvt_f32_ubyte0", False, True),
821 (0x12, 0x12, 0x12, 0x12, 0x12, 0x12, "v_cvt_f32_ubyte1", False, True),
822 (0x13, 0x13, 0x13, 0x13, 0x13, 0x13, "v_cvt_f32_ubyte2", False, True),
823 (0x14, 0x14, 0x14, 0x14, 0x14, 0x14, "v_cvt_f32_ubyte3", False, True),
824 (0x15, 0x15, 0x15, 0x15, 0x15, 0x15, "v_cvt_u32_f64", True, False, InstrClass.ValuDoubleConvert),
825 (0x16, 0x16, 0x16, 0x16, 0x16, 0x16, "v_cvt_f64_u32", False, True, InstrClass.ValuDoubleConvert),
826 ( -1, 0x17, 0x17, 0x17, 0x17, 0x17, "v_trunc_f64", True, True, InstrClass.ValuDouble),
827 ( -1, 0x18, 0x18, 0x18, 0x18, 0x18, "v_ceil_f64", True, True, InstrClass.ValuDouble),
828 ( -1, 0x19, 0x19, 0x19, 0x19, 0x19, "v_rndne_f64", True, True, InstrClass.ValuDouble),
829 ( -1, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, "v_floor_f64", True, True, InstrClass.ValuDouble),
830 ( -1, -1, -1, -1, 0x1b, 0x1b, "v_pipeflush", False, False),
831 (0x20, 0x20, 0x1b, 0x1b, 0x20, 0x20, "v_fract_f32", True, True),
832 (0x21, 0x21, 0x1c, 0x1c, 0x21, 0x21, "v_trunc_f32", True, True),
833 (0x22, 0x22, 0x1d, 0x1d, 0x22, 0x22, "v_ceil_f32", True, True),
834 (0x23, 0x23, 0x1e, 0x1e, 0x23, 0x23, "v_rndne_f32", True, True),
835 (0x24, 0x24, 0x1f, 0x1f, 0x24, 0x24, "v_floor_f32", True, True),
836 (0x25, 0x25, 0x20, 0x20, 0x25, 0x25, "v_exp_f32", True, True, InstrClass.ValuTranscendental32),
837 (0x26, 0x26, -1, -1, -1, -1, "v_log_clamp_f32", True, True, InstrClass.ValuTranscendental32),
838 (0x27, 0x27, 0x21, 0x21, 0x27, 0x27, "v_log_f32", True, True, InstrClass.ValuTranscendental32),
839 (0x28, 0x28, -1, -1, -1, -1, "v_rcp_clamp_f32", True, True, InstrClass.ValuTranscendental32),
840 (0x29, 0x29, -1, -1, -1, -1, "v_rcp_legacy_f32", True, True, InstrClass.ValuTranscendental32),
841 (0x2a, 0x2a, 0x22, 0x22, 0x2a, 0x2a, "v_rcp_f32", True, True, InstrClass.ValuTranscendental32),
842 (0x2b, 0x2b, 0x23, 0x23, 0x2b, 0x2b, "v_rcp_iflag_f32", True, True, InstrClass.ValuTranscendental32),
843 (0x2c, 0x2c, -1, -1, -1, -1, "v_rsq_clamp_f32", True, True, InstrClass.ValuTranscendental32),
844 (0x2d, 0x2d, -1, -1, -1, -1, "v_rsq_legacy_f32", True, True, InstrClass.ValuTranscendental32),
845 (0x2e, 0x2e, 0x24, 0x24, 0x2e, 0x2e, "v_rsq_f32", True, True, InstrClass.ValuTranscendental32),
846 (0x2f, 0x2f, 0x25, 0x25, 0x2f, 0x2f, "v_rcp_f64", True, True, InstrClass.ValuDoubleTranscendental),
847 (0x30, 0x30, -1, -1, -1, -1, "v_rcp_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental),
848 (0x31, 0x31, 0x26, 0x26, 0x31, 0x31, "v_rsq_f64", True, True, InstrClass.ValuDoubleTranscendental),
849 (0x32, 0x32, -1, -1, -1, -1, "v_rsq_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental),
850 (0x33, 0x33, 0x27, 0x27, 0x33, 0x33, "v_sqrt_f32", True, True, InstrClass.ValuTranscendental32),
851 (0x34, 0x34, 0x28, 0x28, 0x34, 0x34, "v_sqrt_f64", True, True, InstrClass.ValuDoubleTranscendental),
852 (0x35, 0x35, 0x29, 0x29, 0x35, 0x35, "v_sin_f32", True, True, InstrClass.ValuTranscendental32),
853 (0x36, 0x36, 0x2a, 0x2a, 0x36, 0x36, "v_cos_f32", True, True, InstrClass.ValuTranscendental32),
854 (0x37, 0x37, 0x2b, 0x2b, 0x37, 0x37, "v_not_b32", False, False),
855 (0x38, 0x38, 0x2c, 0x2c, 0x38, 0x38, "v_bfrev_b32", False, False),
856 (0x39, 0x39, 0x2d, 0x2d, 0x39, 0x39, "v_ffbh_u32", False, False), #v_clz_i32_u32 in GFX11
857 (0x3a, 0x3a, 0x2e, 0x2e, 0x3a, 0x3a, "v_ffbl_b32", False, False), #v_ctz_i32_b32 in GFX11
858 (0x3b, 0x3b, 0x2f, 0x2f, 0x3b, 0x3b, "v_ffbh_i32", False, False), #v_cls_i32 in GFX11
859 (0x3c, 0x3c, 0x30, 0x30, 0x3c, 0x3c, "v_frexp_exp_i32_f64", True, False, InstrClass.ValuDouble),
860 (0x3d, 0x3d, 0x31, 0x31, 0x3d, 0x3d, "v_frexp_mant_f64", True, False, InstrClass.ValuDouble),
861 (0x3e, 0x3e, 0x32, 0x32, 0x3e, 0x3e, "v_fract_f64", True, True, InstrClass.ValuDouble),
862 (0x3f, 0x3f, 0x33, 0x33, 0x3f, 0x3f, "v_frexp_exp_i32_f32", True, False),
863 (0x40, 0x40, 0x34, 0x34, 0x40, 0x40, "v_frexp_mant_f32", True, False),
864 (0x41, 0x41, 0x35, 0x35, 0x41, -1, "v_clrexcp", False, False),
865 (0x42, 0x42, 0x36, -1, 0x42, 0x42, "v_movreld_b32", False, False),
866 (0x43, 0x43, 0x37, -1, 0x43, 0x43, "v_movrels_b32", False, False),
867 (0x44, 0x44, 0x38, -1, 0x44, 0x44, "v_movrelsd_b32", False, False),
868 ( -1, -1, -1, -1, 0x48, 0x48, "v_movrelsd_2_b32", False, False),
869 ( -1, -1, -1, 0x37, -1, -1, "v_screen_partition_4se_b32", False, False),
870 ( -1, -1, 0x39, 0x39, 0x50, 0x50, "v_cvt_f16_u16", False, True),
871 ( -1, -1, 0x3a, 0x3a, 0x51, 0x51, "v_cvt_f16_i16", False, True),
872 ( -1, -1, 0x3b, 0x3b, 0x52, 0x52, "v_cvt_u16_f16", True, False),
873 ( -1, -1, 0x3c, 0x3c, 0x53, 0x53, "v_cvt_i16_f16", True, False),
874 ( -1, -1, 0x3d, 0x3d, 0x54, 0x54, "v_rcp_f16", True, True, InstrClass.ValuTranscendental32),
875 ( -1, -1, 0x3e, 0x3e, 0x55, 0x55, "v_sqrt_f16", True, True, InstrClass.ValuTranscendental32),
876 ( -1, -1, 0x3f, 0x3f, 0x56, 0x56, "v_rsq_f16", True, True, InstrClass.ValuTranscendental32),
877 ( -1, -1, 0x40, 0x40, 0x57, 0x57, "v_log_f16", True, True, InstrClass.ValuTranscendental32),
878 ( -1, -1, 0x41, 0x41, 0x58, 0x58, "v_exp_f16", True, True, InstrClass.ValuTranscendental32),
879 ( -1, -1, 0x42, 0x42, 0x59, 0x59, "v_frexp_mant_f16", True, False),
880 ( -1, -1, 0x43, 0x43, 0x5a, 0x5a, "v_frexp_exp_i16_f16", True, False),
881 ( -1, -1, 0x44, 0x44, 0x5b, 0x5b, "v_floor_f16", True, True),
882 ( -1, -1, 0x45, 0x45, 0x5c, 0x5c, "v_ceil_f16", True, True),
883 ( -1, -1, 0x46, 0x46, 0x5d, 0x5d, "v_trunc_f16", True, True),
884 ( -1, -1, 0x47, 0x47, 0x5e, 0x5e, "v_rndne_f16", True, True),
885 ( -1, -1, 0x48, 0x48, 0x5f, 0x5f, "v_fract_f16", True, True),
886 ( -1, -1, 0x49, 0x49, 0x60, 0x60, "v_sin_f16", True, True, InstrClass.ValuTranscendental32),
887 ( -1, -1, 0x4a, 0x4a, 0x61, 0x61, "v_cos_f16", True, True, InstrClass.ValuTranscendental32),
888 ( -1, 0x46, 0x4b, 0x4b, -1, -1, "v_exp_legacy_f32", True, True, InstrClass.ValuTranscendental32),
889 ( -1, 0x45, 0x4c, 0x4c, -1, -1, "v_log_legacy_f32", True, True, InstrClass.ValuTranscendental32),
890 ( -1, -1, -1, 0x4f, 0x62, 0x62, "v_sat_pk_u8_i16", False, False),
891 ( -1, -1, -1, 0x4d, 0x63, 0x63, "v_cvt_norm_i16_f16", True, False),
892 ( -1, -1, -1, 0x4e, 0x64, 0x64, "v_cvt_norm_u16_f16", True, False),
893 ( -1, -1, -1, 0x51, 0x65, 0x65, "v_swap_b32", False, False),
894 ( -1, -1, -1, -1, 0x68, 0x68, "v_swaprel_b32", False, False),
895 ( -1, -1, -1, -1, -1, 0x67, "v_permlane64_b32", False, False), #cannot use VOP3
896 ( -1, -1, -1, -1, -1, 0x69, "v_not_b16", False, False),
897 ( -1, -1, -1, -1, -1, 0x6a, "v_cvt_i32_i16", False, False),
898 ( -1, -1, -1, -1, -1, 0x6b, "v_cvt_u32_u16", False, False),
899 ( -1, -1, -1, -1, -1, 0x1c, "v_mov_b16", True, False),
901 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod, cls) in default_class(VOP1, InstrClass.Valu32):
902 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP1, cls, in_mod, out_mod)
908 (0x88, 0x88, 0x10, 0x10, 0x88, 0x7e, "v_cmp_class_f32"),
909 ( -1, -1, 0x14, 0x14, 0x8f, 0x7d, "v_cmp_class_f16"),
910 (0x98, 0x98, 0x11, 0x11, 0x98, 0xfe, "v_cmpx_class_f32"),
911 ( -1, -1, 0x15, 0x15, 0x9f, 0xfd, "v_cmpx_class_f16"),
912 (0xa8, 0xa8, 0x12, 0x12, 0xa8, 0x7f, "v_cmp_class_f64", InstrClass.ValuDouble),
913 (0xb8, 0xb8, 0x13, 0x13, 0xb8, 0xff, "v_cmpx_class_f64", InstrClass.ValuDouble),
915 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, cls) in default_class(VOPC_CLASS, InstrClass.Valu32):
916 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, cls, True, False)
918 COMPF = ["f", "lt", "eq", "le", "gt", "lg", "ge", "o", "u", "nge", "nlg", "ngt", "nle", "neq", "nlt", "tru"]
921 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0x20+i, 0x20+i, 0xc8+i, 0x00+i, "v_cmp_"+COMPF[i]+"_f16")
922 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False)
923 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0x30+i, 0x30+i, 0xd8+i, 0x80+i, "v_cmpx_"+COMPF[i]+"_f16")
924 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False)
925 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0x28+i, 0x28+i, 0xe8+i, 0x08+i, "v_cmp_"+COMPF[i+8]+"_f16")
926 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False)
927 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0x38+i, 0x38+i, 0xf8+i, 0x88+i, "v_cmpx_"+COMPF[i+8]+"_f16")
928 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False)
931 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x00+i, 0x00+i, 0x40+i, 0x40+i, 0x00+i, 0x10+i, "v_cmp_"+COMPF[i]+"_f32")
932 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False)
933 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x10+i, 0x10+i, 0x50+i, 0x50+i, 0x10+i, 0x90+i, "v_cmpx_"+COMPF[i]+"_f32")
934 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False)
935 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x20+i, 0x20+i, 0x60+i, 0x60+i, 0x20+i, 0x20+i, "v_cmp_"+COMPF[i]+"_f64")
936 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.ValuDouble, True, False)
937 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x30+i, 0x30+i, 0x70+i, 0x70+i, 0x30+i, 0xa0+i, "v_cmpx_"+COMPF[i]+"_f64")
938 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.ValuDouble, True, False)
940 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x40+i, 0x40+i, -1, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f32")
941 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x50+i, 0x50+i, -1, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f32")
942 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x60+i, 0x60+i, -1, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f64")
943 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x70+i, 0x70+i, -1, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f64")
945 COMPI = ["f", "lt", "eq", "le", "gt", "lg", "ge", "tru"]
948 for i in [0,7]: # only 0 and 7
949 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xa0+i, 0xa0+i, -1, -1, "v_cmp_"+COMPI[i]+"_i16")
950 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
951 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xb0+i, 0xb0+i, -1, -1, "v_cmpx_"+COMPI[i]+"_i16")
952 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
953 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xa8+i, 0xa8+i, -1, -1, "v_cmp_"+COMPI[i]+"_u16")
954 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
955 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xb8+i, 0xb8+i, -1, -1, "v_cmpx_"+COMPI[i]+"_u16")
956 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
958 for i in range(1, 7): # [1..6]
959 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xa0+i, 0xa0+i, 0x88+i, 0x30+i, "v_cmp_"+COMPI[i]+"_i16")
960 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
961 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xb0+i, 0xb0+i, 0x98+i, 0xb0+i, "v_cmpx_"+COMPI[i]+"_i16")
962 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
963 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xa8+i, 0xa8+i, 0xa8+i, 0x38+i, "v_cmp_"+COMPI[i]+"_u16")
964 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
965 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xb8+i, 0xb8+i, 0xb8+i, 0xb8+i, "v_cmpx_"+COMPI[i]+"_u16")
966 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
969 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x80+i, 0x80+i, 0xc0+i, 0xc0+i, 0x80+i, 0x40+i, "v_cmp_"+COMPI[i]+"_i32")
970 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
971 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x90+i, 0x90+i, 0xd0+i, 0xd0+i, 0x90+i, 0xc0+i, "v_cmpx_"+COMPI[i]+"_i32")
972 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
973 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xa0+i, 0xa0+i, 0xe0+i, 0xe0+i, 0xa0+i, 0x50+i, "v_cmp_"+COMPI[i]+"_i64")
974 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu64)
975 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xb0+i, 0xb0+i, 0xf0+i, 0xf0+i, 0xb0+i, 0xd0+i, "v_cmpx_"+COMPI[i]+"_i64")
976 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu64)
977 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xc0+i, 0xc0+i, 0xc8+i, 0xc8+i, 0xc0+i, 0x48+i, "v_cmp_"+COMPI[i]+"_u32")
978 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
979 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xd0+i, 0xd0+i, 0xd8+i, 0xd8+i, 0xd0+i, 0xc8+i, "v_cmpx_"+COMPI[i]+"_u32")
980 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32)
981 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xe0+i, 0xe0+i, 0xe8+i, 0xe8+i, 0xe0+i, 0x58+i, "v_cmp_"+COMPI[i]+"_u64")
982 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu64)
983 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xf0+i, 0xf0+i, 0xf8+i, 0xf8+i, 0xf0+i, 0xd8+i, "v_cmpx_"+COMPI[i]+"_u64")
984 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu64)
987 # VOPP instructions: packed 16bit instructions - 1 or 2 inputs and 1 output
989 # opcode, name, input/output modifiers
990 (0x00, "v_pk_mad_i16", False),
991 (0x01, "v_pk_mul_lo_u16", False),
992 (0x02, "v_pk_add_i16", False),
993 (0x03, "v_pk_sub_i16", False),
994 (0x04, "v_pk_lshlrev_b16", False),
995 (0x05, "v_pk_lshrrev_b16", False),
996 (0x06, "v_pk_ashrrev_i16", False),
997 (0x07, "v_pk_max_i16", False),
998 (0x08, "v_pk_min_i16", False),
999 (0x09, "v_pk_mad_u16", False),
1000 (0x0a, "v_pk_add_u16", False),
1001 (0x0b, "v_pk_sub_u16", False),
1002 (0x0c, "v_pk_max_u16", False),
1003 (0x0d, "v_pk_min_u16", False),
1004 (0x0e, "v_pk_fma_f16", True),
1005 (0x0f, "v_pk_add_f16", True),
1006 (0x10, "v_pk_mul_f16", True),
1007 (0x11, "v_pk_min_f16", True),
1008 (0x12, "v_pk_max_f16", True),
1009 (0x20, "v_fma_mix_f32", True), # v_mad_mix_f32 in VEGA ISA, v_fma_mix_f32 in RDNA ISA
1010 (0x21, "v_fma_mixlo_f16", True), # v_mad_mixlo_f16 in VEGA ISA, v_fma_mixlo_f16 in RDNA ISA
1011 (0x22, "v_fma_mixhi_f16", True), # v_mad_mixhi_f16 in VEGA ISA, v_fma_mixhi_f16 in RDNA ISA
1013 # note that these are only supported on gfx9+ so we'll need to distinguish between gfx8 and gfx9 here
1014 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, -1, code, code, code, name)
1015 for (code, name, modifiers) in VOPP:
1016 opcode(name, -1, code, code, code, Format.VOP3P, InstrClass.Valu32, modifiers, modifiers)
1017 opcode("v_dot2_i32_i16", -1, 0x26, 0x14, -1, Format.VOP3P, InstrClass.Valu32)
1018 opcode("v_dot2_u32_u16", -1, 0x27, 0x15, -1, Format.VOP3P, InstrClass.Valu32)
1019 opcode("v_dot4_i32_iu8", -1, -1, -1, 0x16, Format.VOP3P, InstrClass.Valu32)
1020 opcode("v_dot4_i32_i8", -1, 0x28, 0x16, -1, Format.VOP3P, InstrClass.Valu32)
1021 opcode("v_dot4_u32_u8", -1, 0x29, 0x17, 0x17, Format.VOP3P, InstrClass.Valu32)
1022 opcode("v_dot8_i32_iu4", -1, -1, -1, 0x18, Format.VOP3P, InstrClass.Valu32)
1023 opcode("v_dot8_u32_u4", -1, 0x2b, 0x19, 0x19, Format.VOP3P, InstrClass.Valu32)
1024 opcode("v_dot2_f32_f16", -1, 0x23, 0x13, 0x13, Format.VOP3P, InstrClass.Valu32)
1025 opcode("v_dot2_f32_bf16", -1, -1, -1, 0x1a, Format.VOP3P, InstrClass.Valu32)
1028 # VINTRP (GFX6 - GFX10.3) instructions:
1030 (0x00, "v_interp_p1_f32"),
1031 (0x01, "v_interp_p2_f32"),
1032 (0x02, "v_interp_mov_f32"),
1034 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (code, code, code, code, code, code, name)
1035 for (code, name) in VINTRP:
1036 opcode(name, code, code, code, -1, Format.VINTRP, InstrClass.Valu32)
1039 # VINTERP (GFX11+) instructions:
1041 (0x00, "v_interp_p10_f32_inreg"),
1042 (0x01, "v_interp_p2_f32_inreg"),
1043 (0x02, "v_interp_p10_f16_f32_inreg"),
1044 (0x03, "v_interp_p2_f16_f32_inreg"),
1045 (0x04, "v_interp_p10_rtz_f16_f32_inreg"),
1046 (0x05, "v_interp_p2_rtz_f16_f32_inreg"),
1048 for (code, name) in VINTERP:
1049 opcode(name, -1, -1, -1, code, Format.VINTERP_INREG, InstrClass.Valu32)
1052 # VOP3 instructions: 3 inputs, 1 output
1053 # VOP3b instructions: have a unique scalar output, e.g. VOP2 with vcc out
1055 (0x140, 0x140, 0x1c0, 0x1c0, 0x140, -1, "v_mad_legacy_f32", True, True), # GFX6-GFX10
1056 (0x141, 0x141, 0x1c1, 0x1c1, 0x141, -1, "v_mad_f32", True, True),
1057 (0x142, 0x142, 0x1c2, 0x1c2, 0x142, 0x20a, "v_mad_i32_i24", False, False),
1058 (0x143, 0x143, 0x1c3, 0x1c3, 0x143, 0x20b, "v_mad_u32_u24", False, False),
1059 (0x144, 0x144, 0x1c4, 0x1c4, 0x144, 0x20c, "v_cubeid_f32", True, True),
1060 (0x145, 0x145, 0x1c5, 0x1c5, 0x145, 0x20d, "v_cubesc_f32", True, True),
1061 (0x146, 0x146, 0x1c6, 0x1c6, 0x146, 0x20e, "v_cubetc_f32", True, True),
1062 (0x147, 0x147, 0x1c7, 0x1c7, 0x147, 0x20f, "v_cubema_f32", True, True),
1063 (0x148, 0x148, 0x1c8, 0x1c8, 0x148, 0x210, "v_bfe_u32", False, False),
1064 (0x149, 0x149, 0x1c9, 0x1c9, 0x149, 0x211, "v_bfe_i32", False, False),
1065 (0x14a, 0x14a, 0x1ca, 0x1ca, 0x14a, 0x212, "v_bfi_b32", False, False),
1066 (0x14b, 0x14b, 0x1cb, 0x1cb, 0x14b, 0x213, "v_fma_f32", True, True, InstrClass.ValuFma),
1067 (0x14c, 0x14c, 0x1cc, 0x1cc, 0x14c, 0x214, "v_fma_f64", True, True, InstrClass.ValuDouble),
1068 (0x14d, 0x14d, 0x1cd, 0x1cd, 0x14d, 0x215, "v_lerp_u8", False, False),
1069 (0x14e, 0x14e, 0x1ce, 0x1ce, 0x14e, 0x216, "v_alignbit_b32", False, False),
1070 (0x14f, 0x14f, 0x1cf, 0x1cf, 0x14f, 0x217, "v_alignbyte_b32", False, False),
1071 (0x150, 0x150, -1, -1, 0x150, 0x218, "v_mullit_f32", True, True),
1072 (0x151, 0x151, 0x1d0, 0x1d0, 0x151, 0x219, "v_min3_f32", True, True),
1073 (0x152, 0x152, 0x1d1, 0x1d1, 0x152, 0x21a, "v_min3_i32", False, False),
1074 (0x153, 0x153, 0x1d2, 0x1d2, 0x153, 0x21b, "v_min3_u32", False, False),
1075 (0x154, 0x154, 0x1d3, 0x1d3, 0x154, 0x21c, "v_max3_f32", True, True),
1076 (0x155, 0x155, 0x1d4, 0x1d4, 0x155, 0x21d, "v_max3_i32", False, False),
1077 (0x156, 0x156, 0x1d5, 0x1d5, 0x156, 0x21e, "v_max3_u32", False, False),
1078 (0x157, 0x157, 0x1d6, 0x1d6, 0x157, 0x21f, "v_med3_f32", True, True),
1079 (0x158, 0x158, 0x1d7, 0x1d7, 0x158, 0x220, "v_med3_i32", False, False),
1080 (0x159, 0x159, 0x1d8, 0x1d8, 0x159, 0x221, "v_med3_u32", False, False),
1081 (0x15a, 0x15a, 0x1d9, 0x1d9, 0x15a, 0x222, "v_sad_u8", False, False),
1082 (0x15b, 0x15b, 0x1da, 0x1da, 0x15b, 0x223, "v_sad_hi_u8", False, False),
1083 (0x15c, 0x15c, 0x1db, 0x1db, 0x15c, 0x224, "v_sad_u16", False, False),
1084 (0x15d, 0x15d, 0x1dc, 0x1dc, 0x15d, 0x225, "v_sad_u32", False, False),
1085 (0x15e, 0x15e, 0x1dd, 0x1dd, 0x15e, 0x226, "v_cvt_pk_u8_f32", True, False),
1086 (0x15f, 0x15f, 0x1de, 0x1de, 0x15f, 0x227, "v_div_fixup_f32", True, True),
1087 (0x160, 0x160, 0x1df, 0x1df, 0x160, 0x228, "v_div_fixup_f64", True, True),
1088 (0x161, 0x161, -1, -1, -1, -1, "v_lshl_b64", False, False, InstrClass.Valu64),
1089 (0x162, 0x162, -1, -1, -1, -1, "v_lshr_b64", False, False, InstrClass.Valu64),
1090 (0x163, 0x163, -1, -1, -1, -1, "v_ashr_i64", False, False, InstrClass.Valu64),
1091 (0x164, 0x164, 0x280, 0x280, 0x164, 0x327, "v_add_f64", True, True, InstrClass.ValuDoubleAdd),
1092 (0x165, 0x165, 0x281, 0x281, 0x165, 0x328, "v_mul_f64", True, True, InstrClass.ValuDouble),
1093 (0x166, 0x166, 0x282, 0x282, 0x166, 0x329, "v_min_f64", True, True, InstrClass.ValuDouble),
1094 (0x167, 0x167, 0x283, 0x283, 0x167, 0x32a, "v_max_f64", True, True, InstrClass.ValuDouble),
1095 (0x168, 0x168, 0x284, 0x284, 0x168, 0x32b, "v_ldexp_f64", False, True, InstrClass.ValuDouble), # src1 can take input modifiers
1096 (0x169, 0x169, 0x285, 0x285, 0x169, 0x32c, "v_mul_lo_u32", False, False, InstrClass.ValuQuarterRate32),
1097 (0x16a, 0x16a, 0x286, 0x286, 0x16a, 0x32d, "v_mul_hi_u32", False, False, InstrClass.ValuQuarterRate32),
1098 (0x16b, 0x16b, 0x285, 0x285, 0x16b, 0x32c, "v_mul_lo_i32", False, False, InstrClass.ValuQuarterRate32), # identical to v_mul_lo_u32
1099 (0x16c, 0x16c, 0x287, 0x287, 0x16c, 0x32e, "v_mul_hi_i32", False, False, InstrClass.ValuQuarterRate32),
1100 (0x16d, 0x16d, 0x1e0, 0x1e0, 0x16d, 0x2fc, "v_div_scale_f32", True, True), # writes to VCC
1101 (0x16e, 0x16e, 0x1e1, 0x1e1, 0x16e, 0x2fd, "v_div_scale_f64", True, True, InstrClass.ValuDouble), # writes to VCC
1102 (0x16f, 0x16f, 0x1e2, 0x1e2, 0x16f, 0x237, "v_div_fmas_f32", True, True), # takes VCC input
1103 (0x170, 0x170, 0x1e3, 0x1e3, 0x170, 0x238, "v_div_fmas_f64", True, True, InstrClass.ValuDouble), # takes VCC input
1104 (0x171, 0x171, 0x1e4, 0x1e4, 0x171, 0x239, "v_msad_u8", False, False),
1105 (0x172, 0x172, 0x1e5, 0x1e5, 0x172, 0x23a, "v_qsad_pk_u16_u8", False, False),
1106 (0x172, -1, -1, -1, -1, -1, "v_qsad_u8", False, False), # what's the difference?
1107 (0x173, 0x173, 0x1e6, 0x1e6, 0x173, 0x23b, "v_mqsad_pk_u16_u8", False, False),
1108 (0x173, -1, -1, -1, -1, -1, "v_mqsad_u8", False, False), # what's the difference?
1109 (0x174, 0x174, 0x292, 0x292, 0x174, 0x32f, "v_trig_preop_f64", False, False, InstrClass.ValuDouble),
1110 ( -1, 0x175, 0x1e7, 0x1e7, 0x175, 0x23d, "v_mqsad_u32_u8", False, False),
1111 ( -1, 0x176, 0x1e8, 0x1e8, 0x176, 0x2fe, "v_mad_u64_u32", False, False, InstrClass.Valu64),
1112 ( -1, 0x177, 0x1e9, 0x1e9, 0x177, 0x2ff, "v_mad_i64_i32", False, False, InstrClass.Valu64),
1113 ( -1, -1, 0x1ea, 0x1ea, -1, -1, "v_mad_legacy_f16", True, True),
1114 ( -1, -1, 0x1eb, 0x1eb, -1, -1, "v_mad_legacy_u16", False, False),
1115 ( -1, -1, 0x1ec, 0x1ec, -1, -1, "v_mad_legacy_i16", False, False),
1116 ( -1, -1, 0x1ed, 0x1ed, 0x344, 0x244, "v_perm_b32", False, False),
1117 ( -1, -1, 0x1ee, 0x1ee, -1, -1, "v_fma_legacy_f16", True, True, InstrClass.ValuFma),
1118 ( -1, -1, 0x1ef, 0x1ef, -1, -1, "v_div_fixup_legacy_f16", True, True),
1119 (0x12c, 0x12c, 0x1f0, 0x1f0, -1, -1, "v_cvt_pkaccum_u8_f32", True, False),
1120 ( -1, -1, -1, 0x1f1, 0x373, 0x259, "v_mad_u32_u16", False, False),
1121 ( -1, -1, -1, 0x1f2, 0x375, 0x25a, "v_mad_i32_i16", False, False),
1122 ( -1, -1, -1, 0x1f3, 0x345, 0x245, "v_xad_u32", False, False),
1123 ( -1, -1, -1, 0x1f4, 0x351, 0x249, "v_min3_f16", True, True),
1124 ( -1, -1, -1, 0x1f5, 0x352, 0x24a, "v_min3_i16", False, False),
1125 ( -1, -1, -1, 0x1f6, 0x353, 0x24b, "v_min3_u16", False, False),
1126 ( -1, -1, -1, 0x1f7, 0x354, 0x24c, "v_max3_f16", True, True),
1127 ( -1, -1, -1, 0x1f8, 0x355, 0x24d, "v_max3_i16", False, False),
1128 ( -1, -1, -1, 0x1f9, 0x356, 0x24e, "v_max3_u16", False, False),
1129 ( -1, -1, -1, 0x1fa, 0x357, 0x24f, "v_med3_f16", True, True),
1130 ( -1, -1, -1, 0x1fb, 0x358, 0x250, "v_med3_i16", False, False),
1131 ( -1, -1, -1, 0x1fc, 0x359, 0x251, "v_med3_u16", False, False),
1132 ( -1, -1, -1, 0x1fd, 0x346, 0x246, "v_lshl_add_u32", False, False),
1133 ( -1, -1, -1, 0x1fe, 0x347, 0x247, "v_add_lshl_u32", False, False),
1134 ( -1, -1, -1, 0x1ff, 0x36d, 0x255, "v_add3_u32", False, False),
1135 ( -1, -1, -1, 0x200, 0x36f, 0x256, "v_lshl_or_b32", False, False),
1136 ( -1, -1, -1, 0x201, 0x371, 0x257, "v_and_or_b32", False, False),
1137 ( -1, -1, -1, 0x202, 0x372, 0x258, "v_or3_b32", False, False),
1138 ( -1, -1, -1, 0x203, -1, -1, "v_mad_f16", True, True),
1139 ( -1, -1, -1, 0x204, 0x340, 0x241, "v_mad_u16", False, False),
1140 ( -1, -1, -1, 0x205, 0x35e, 0x253, "v_mad_i16", False, False),
1141 ( -1, -1, -1, 0x206, 0x34b, 0x248, "v_fma_f16", True, True),
1142 ( -1, -1, -1, 0x207, 0x35f, 0x254, "v_div_fixup_f16", True, True),
1143 ( -1, -1, 0x274, 0x274, 0x342, -1, "v_interp_p1ll_f16", True, True),
1144 ( -1, -1, 0x275, 0x275, 0x343, -1, "v_interp_p1lv_f16", True, True),
1145 ( -1, -1, 0x276, 0x276, -1, -1, "v_interp_p2_legacy_f16", True, True),
1146 ( -1, -1, -1, 0x277, 0x35a, -1, "v_interp_p2_f16", True, True),
1147 (0x12b, 0x12b, 0x288, 0x288, 0x362, 0x31c, "v_ldexp_f32", False, True),
1148 ( -1, -1, 0x289, 0x289, 0x360, 0x360, "v_readlane_b32_e64", False, False),
1149 ( -1, -1, 0x28a, 0x28a, 0x361, 0x361, "v_writelane_b32_e64", False, False),
1150 (0x122, 0x122, 0x28b, 0x28b, 0x364, 0x31e, "v_bcnt_u32_b32", False, False),
1151 (0x123, 0x123, 0x28c, 0x28c, 0x365, 0x31f, "v_mbcnt_lo_u32_b32", False, False),
1152 ( -1, -1, 0x28d, 0x28d, 0x366, 0x320, "v_mbcnt_hi_u32_b32_e64", False, False),
1153 ( -1, -1, 0x28f, 0x28f, 0x2ff, 0x33c, "v_lshlrev_b64", False, False, InstrClass.Valu64),
1154 ( -1, -1, 0x290, 0x290, 0x300, 0x33d, "v_lshrrev_b64", False, False, InstrClass.Valu64),
1155 ( -1, -1, 0x291, 0x291, 0x301, 0x33e, "v_ashrrev_i64", False, False, InstrClass.Valu64),
1156 (0x11e, 0x11e, 0x293, 0x293, 0x363, 0x31d, "v_bfm_b32", False, False),
1157 (0x12d, 0x12d, 0x294, 0x294, 0x368, 0x321, "v_cvt_pknorm_i16_f32", True, False),
1158 (0x12e, 0x12e, 0x295, 0x295, 0x369, 0x322, "v_cvt_pknorm_u16_f32", True, False),
1159 (0x12f, 0x12f, 0x296, 0x296, 0x12f, 0x12f, "v_cvt_pkrtz_f16_f32_e64", True, False), # GFX6_7_10_11 is VOP2 with opcode 0x02f
1160 (0x130, 0x130, 0x297, 0x297, 0x36a, 0x323, "v_cvt_pk_u16_u32", False, False),
1161 (0x131, 0x131, 0x298, 0x298, 0x36b, 0x324, "v_cvt_pk_i16_i32", False, False),
1162 ( -1, -1, -1, 0x299, 0x312, 0x312, "v_cvt_pknorm_i16_f16", True, False), #v_cvt_pk_norm_i16_f32 in GFX11
1163 ( -1, -1, -1, 0x29a, 0x313, 0x313, "v_cvt_pknorm_u16_f16", True, False), #v_cvt_pk_norm_u16_f32 in GFX11
1164 ( -1, -1, -1, 0x29c, 0x37f, 0x326, "v_add_i32", False, False),
1165 ( -1, -1, -1, 0x29d, 0x376, 0x325, "v_sub_i32", False, False),
1166 ( -1, -1, -1, 0x29e, 0x30d, 0x30d, "v_add_i16", False, False),
1167 ( -1, -1, -1, 0x29f, 0x30e, 0x30e, "v_sub_i16", False, False),
1168 ( -1, -1, -1, 0x2a0, 0x311, 0x311, "v_pack_b32_f16", True, False),
1169 ( -1, -1, -1, -1, 0x178, 0x240, "v_xor3_b32", False, False),
1170 ( -1, -1, -1, -1, 0x377, 0x25b, "v_permlane16_b32", False, False),
1171 ( -1, -1, -1, -1, 0x378, 0x25c, "v_permlanex16_b32", False, False),
1172 ( -1, -1, -1, -1, 0x30f, 0x300, "v_add_co_u32_e64", False, False),
1173 ( -1, -1, -1, -1, 0x310, 0x301, "v_sub_co_u32_e64", False, False),
1174 ( -1, -1, -1, -1, 0x319, 0x302, "v_subrev_co_u32_e64", False, False),
1175 ( -1, -1, -1, -1, 0x303, 0x303, "v_add_u16_e64", False, False),
1176 ( -1, -1, -1, -1, 0x304, 0x304, "v_sub_u16_e64", False, False),
1177 ( -1, -1, -1, -1, 0x305, 0x305, "v_mul_lo_u16_e64", False, False),
1178 ( -1, -1, -1, -1, 0x309, 0x309, "v_max_u16_e64", False, False),
1179 ( -1, -1, -1, -1, 0x30a, 0x30a, "v_max_i16_e64", False, False),
1180 ( -1, -1, -1, -1, 0x30b, 0x30b, "v_min_u16_e64", False, False),
1181 ( -1, -1, -1, -1, 0x30c, 0x30c, "v_min_i16_e64", False, False),
1182 ( -1, -1, -1, -1, 0x307, 0x339, "v_lshrrev_b16_e64", False, False),
1183 ( -1, -1, -1, -1, 0x308, 0x33a, "v_ashrrev_i16_e64", False, False),
1184 ( -1, -1, -1, -1, 0x314, 0x338, "v_lshlrev_b16_e64", False, False),
1185 ( -1, -1, -1, -1, 0x140, 0x209, "v_fma_legacy_f32", True, True, InstrClass.ValuFma), #GFX10.3+, v_fma_dx9_zero_f32 in GFX11
1186 ( -1, -1, -1, -1, -1, 0x25e, "v_maxmin_f32", True, True),
1187 ( -1, -1, -1, -1, -1, 0x25f, "v_minmax_f32", True, True),
1188 ( -1, -1, -1, -1, -1, 0x260, "v_maxmin_f16", True, True),
1189 ( -1, -1, -1, -1, -1, 0x261, "v_minmax_f16", True, True),
1190 ( -1, -1, -1, -1, -1, 0x262, "v_maxmin_u32", False, False),
1191 ( -1, -1, -1, -1, -1, 0x263, "v_minmax_u32", False, False),
1192 ( -1, -1, -1, -1, -1, 0x264, "v_maxmin_i32", False, False),
1193 ( -1, -1, -1, -1, -1, 0x265, "v_minmax_i32", False, False),
1194 ( -1, -1, -1, -1, -1, 0x266, "v_dot2_f16_f16", False, False),
1195 ( -1, -1, -1, -1, -1, 0x267, "v_dot2_bf16_bf16", False, False),
1196 ( -1, -1, -1, -1, -1, 0x306, "v_cvt_pk_i16_f32", True, False),
1197 ( -1, -1, -1, -1, -1, 0x307, "v_cvt_pk_u16_f32", True, False),
1198 ( -1, -1, -1, -1, -1, 0x362, "v_and_b16", False, False),
1199 ( -1, -1, -1, -1, -1, 0x363, "v_or_b16", False, False),
1200 ( -1, -1, -1, -1, -1, 0x364, "v_xor_b16", False, False),
1201 ( -1, -1, -1, -1, -1, 0x25d, "v_cndmask_b16", True, False),
1203 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod, cls) in default_class(VOP3, InstrClass.Valu32):
1204 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP3, cls, in_mod, out_mod)
1207 # DS instructions: 3 inputs (1 addr, 2 data), 1 output
1209 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"),
1210 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "ds_sub_u32"),
1211 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "ds_rsub_u32"),
1212 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "ds_inc_u32"),
1213 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "ds_dec_u32"),
1214 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "ds_min_i32"),
1215 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "ds_max_i32"),
1216 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "ds_min_u32"),
1217 (0x08, 0x08, 0x08, 0x08, 0x08, 0x08, "ds_max_u32"),
1218 (0x09, 0x09, 0x09, 0x09, 0x09, 0x09, "ds_and_b32"),
1219 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "ds_or_b32"),
1220 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "ds_xor_b32"),
1221 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "ds_mskor_b32"),
1222 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "ds_write_b32"), #ds_store_b32 in GFX11
1223 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "ds_write2_b32"), #ds_store_2addr_b32 in GFX11
1224 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "ds_write2st64_b32"), #ds_store_2addr_stride64_b32 in GFX11
1225 (0x10, 0x10, 0x10, 0x10, 0x10, 0x10, "ds_cmpst_b32"), #ds_cmpstore_b32 in GFX11
1226 (0x11, 0x11, 0x11, 0x11, 0x11, 0x11, "ds_cmpst_f32"), #ds_cmpstore_f32 in GFX11
1227 (0x12, 0x12, 0x12, 0x12, 0x12, 0x12, "ds_min_f32"),
1228 (0x13, 0x13, 0x13, 0x13, 0x13, 0x13, "ds_max_f32"),
1229 ( -1, 0x14, 0x14, 0x14, 0x14, 0x14, "ds_nop"),
1230 ( -1, -1, 0x15, 0x15, 0x15, 0x15, "ds_add_f32"),
1231 ( -1, -1, 0x1d, 0x1d, 0xb0, 0xb0, "ds_write_addtid_b32"), #ds_store_addtid_b32 in GFX11
1232 (0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, "ds_write_b8"), #ds_store_b8 in GFX11
1233 (0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, "ds_write_b16"), #ds_store_b16 in GFX11
1234 (0x20, 0x20, 0x20, 0x20, 0x20, 0x20, "ds_add_rtn_u32"),
1235 (0x21, 0x21, 0x21, 0x21, 0x21, 0x21, "ds_sub_rtn_u32"),
1236 (0x22, 0x22, 0x22, 0x22, 0x22, 0x22, "ds_rsub_rtn_u32"),
1237 (0x23, 0x23, 0x23, 0x23, 0x23, 0x23, "ds_inc_rtn_u32"),
1238 (0x24, 0x24, 0x24, 0x24, 0x24, 0x24, "ds_dec_rtn_u32"),
1239 (0x25, 0x25, 0x25, 0x25, 0x25, 0x25, "ds_min_rtn_i32"),
1240 (0x26, 0x26, 0x26, 0x26, 0x26, 0x26, "ds_max_rtn_i32"),
1241 (0x27, 0x27, 0x27, 0x27, 0x27, 0x27, "ds_min_rtn_u32"),
1242 (0x28, 0x28, 0x28, 0x28, 0x28, 0x28, "ds_max_rtn_u32"),
1243 (0x29, 0x29, 0x29, 0x29, 0x29, 0x29, "ds_and_rtn_b32"),
1244 (0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, "ds_or_rtn_b32"),
1245 (0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, "ds_xor_rtn_b32"),
1246 (0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, "ds_mskor_rtn_b32"),
1247 (0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, "ds_wrxchg_rtn_b32"), #ds_storexchg_rtn_b32 in GFX11
1248 (0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, "ds_wrxchg2_rtn_b32"), #ds_storexchg_2addr_rtn_b32 in GFX11
1249 (0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, "ds_wrxchg2st64_rtn_b32"), #ds_storexchg_2addr_stride64_rtn_b32 in GFX11
1250 (0x30, 0x30, 0x30, 0x30, 0x30, 0x30, "ds_cmpst_rtn_b32"), #ds_cmpstore_rtn_b32 in GFX11
1251 (0x31, 0x31, 0x31, 0x31, 0x31, 0x31, "ds_cmpst_rtn_f32"), #ds_cmpstore_rtn_f32 in GFX11
1252 (0x32, 0x32, 0x32, 0x32, 0x32, 0x32, "ds_min_rtn_f32"),
1253 (0x33, 0x33, 0x33, 0x33, 0x33, 0x33, "ds_max_rtn_f32"),
1254 ( -1, 0x34, 0x34, 0x34, 0x34, 0x34, "ds_wrap_rtn_b32"),
1255 ( -1, -1, 0x35, 0x35, 0x55, 0x79, "ds_add_rtn_f32"),
1256 (0x36, 0x36, 0x36, 0x36, 0x36, 0x36, "ds_read_b32"), #ds_load_b32 in GFX11
1257 (0x37, 0x37, 0x37, 0x37, 0x37, 0x37, "ds_read2_b32"), #ds_load_2addr_b32 in GFX11
1258 (0x38, 0x38, 0x38, 0x38, 0x38, 0x38, "ds_read2st64_b32"), #ds_load_2addr_stride64_b32 in GFX11
1259 (0x39, 0x39, 0x39, 0x39, 0x39, 0x39, "ds_read_i8"), #ds_load_i8 in GFX11
1260 (0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, "ds_read_u8"), #ds_load_u8 in GFX11
1261 (0x3b, 0x3b, 0x3b, 0x3b, 0x3b, 0x3b, "ds_read_i16"), #ds_load_i16 in GFX11
1262 (0x3c, 0x3c, 0x3c, 0x3c, 0x3c, 0x3c, "ds_read_u16"), #ds_load_u16 in GFX11
1263 (0x35, 0x35, 0x3d, 0x3d, 0x35, 0x35, "ds_swizzle_b32"), #data1 & offset, no addr/data2
1264 ( -1, -1, 0x3e, 0x3e, 0xb2, 0xb2, "ds_permute_b32"),
1265 ( -1, -1, 0x3f, 0x3f, 0xb3, 0xb3, "ds_bpermute_b32"),
1266 (0x40, 0x40, 0x40, 0x40, 0x40, 0x40, "ds_add_u64"),
1267 (0x41, 0x41, 0x41, 0x41, 0x41, 0x41, "ds_sub_u64"),
1268 (0x42, 0x42, 0x42, 0x42, 0x42, 0x42, "ds_rsub_u64"),
1269 (0x43, 0x43, 0x43, 0x43, 0x43, 0x43, "ds_inc_u64"),
1270 (0x44, 0x44, 0x44, 0x44, 0x44, 0x44, "ds_dec_u64"),
1271 (0x45, 0x45, 0x45, 0x45, 0x45, 0x45, "ds_min_i64"),
1272 (0x46, 0x46, 0x46, 0x46, 0x46, 0x46, "ds_max_i64"),
1273 (0x47, 0x47, 0x47, 0x47, 0x47, 0x47, "ds_min_u64"),
1274 (0x48, 0x48, 0x48, 0x48, 0x48, 0x48, "ds_max_u64"),
1275 (0x49, 0x49, 0x49, 0x49, 0x49, 0x49, "ds_and_b64"),
1276 (0x4a, 0x4a, 0x4a, 0x4a, 0x4a, 0x4a, "ds_or_b64"),
1277 (0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, "ds_xor_b64"),
1278 (0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, "ds_mskor_b64"),
1279 (0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, "ds_write_b64"), #ds_store_b64 in GFX11
1280 (0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, "ds_write2_b64"), #ds_store_2addr_b64 in GFX11
1281 (0x4f, 0x4f, 0x4f, 0x4f, 0x4f, 0x4f, "ds_write2st64_b64"), #ds_store_2addr_stride64_b64 in GFX11
1282 (0x50, 0x50, 0x50, 0x50, 0x50, 0x50, "ds_cmpst_b64"), #ds_cmpstore_b64 in GFX11
1283 (0x51, 0x51, 0x51, 0x51, 0x51, 0x51, "ds_cmpst_f64"), #ds_cmpstore_f64 in GFX11
1284 (0x52, 0x52, 0x52, 0x52, 0x52, 0x52, "ds_min_f64"),
1285 (0x53, 0x53, 0x53, 0x53, 0x53, 0x53, "ds_max_f64"),
1286 ( -1, -1, -1, 0x54, 0xa0, 0xa0, "ds_write_b8_d16_hi"), #ds_store_b8_d16_hi in GFX11
1287 ( -1, -1, -1, 0x55, 0xa1, 0xa1, "ds_write_b16_d16_hi"), #ds_store_b16_d16_hi in GFX11
1288 ( -1, -1, -1, 0x56, 0xa2, 0xa2, "ds_read_u8_d16"), #ds_load_u8_d16 in GFX11
1289 ( -1, -1, -1, 0x57, 0xa3, 0xa3, "ds_read_u8_d16_hi"), #ds_load_u8_d16_hi in GFX11
1290 ( -1, -1, -1, 0x58, 0xa4, 0xa4, "ds_read_i8_d16"), #ds_load_i8_d16 in GFX11
1291 ( -1, -1, -1, 0x59, 0xa5, 0xa5, "ds_read_i8_d16_hi"), #ds_load_i8_d16_hi in GFX11
1292 ( -1, -1, -1, 0x5a, 0xa6, 0xa6, "ds_read_u16_d16"), #ds_load_u16_d16 in GFX11
1293 ( -1, -1, -1, 0x5b, 0xa7, 0xa7, "ds_read_u16_d16_hi"), #ds_load_u16_d16_hi in GFX11
1294 (0x60, 0x60, 0x60, 0x60, 0x60, 0x60, "ds_add_rtn_u64"),
1295 (0x61, 0x61, 0x61, 0x61, 0x61, 0x61, "ds_sub_rtn_u64"),
1296 (0x62, 0x62, 0x62, 0x62, 0x62, 0x62, "ds_rsub_rtn_u64"),
1297 (0x63, 0x63, 0x63, 0x63, 0x63, 0x63, "ds_inc_rtn_u64"),
1298 (0x64, 0x64, 0x64, 0x64, 0x64, 0x64, "ds_dec_rtn_u64"),
1299 (0x65, 0x65, 0x65, 0x65, 0x65, 0x65, "ds_min_rtn_i64"),
1300 (0x66, 0x66, 0x66, 0x66, 0x66, 0x66, "ds_max_rtn_i64"),
1301 (0x67, 0x67, 0x67, 0x67, 0x67, 0x67, "ds_min_rtn_u64"),
1302 (0x68, 0x68, 0x68, 0x68, 0x68, 0x68, "ds_max_rtn_u64"),
1303 (0x69, 0x69, 0x69, 0x69, 0x69, 0x69, "ds_and_rtn_b64"),
1304 (0x6a, 0x6a, 0x6a, 0x6a, 0x6a, 0x6a, "ds_or_rtn_b64"),
1305 (0x6b, 0x6b, 0x6b, 0x6b, 0x6b, 0x6b, "ds_xor_rtn_b64"),
1306 (0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, "ds_mskor_rtn_b64"),
1307 (0x6d, 0x6d, 0x6d, 0x6d, 0x6d, 0x6d, "ds_wrxchg_rtn_b64"), #ds_storexchg_rtn_b64 in GFX11
1308 (0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, "ds_wrxchg2_rtn_b64"), #ds_storexchg_2addr_rtn_b64 in GFX11
1309 (0x6f, 0x6f, 0x6f, 0x6f, 0x6f, 0x6f, "ds_wrxchg2st64_rtn_b64"), #ds_storexchg_2addr_stride64_rtn_b64 in GFX11
1310 (0x70, 0x70, 0x70, 0x70, 0x70, 0x70, "ds_cmpst_rtn_b64"), #ds_cmpstore_rtn_b64 in GFX11
1311 (0x71, 0x71, 0x71, 0x71, 0x71, 0x71, "ds_cmpst_rtn_f64"), #ds_cmpstore_rtn_f64 in GFX11
1312 (0x72, 0x72, 0x72, 0x72, 0x72, 0x72, "ds_min_rtn_f64"),
1313 (0x73, 0x73, 0x73, 0x73, 0x73, 0x73, "ds_max_rtn_f64"),
1314 (0x76, 0x76, 0x76, 0x76, 0x76, 0x76, "ds_read_b64"), #ds_load_b64 in GFX11
1315 (0x77, 0x77, 0x77, 0x77, 0x77, 0x77, "ds_read2_b64"), #ds_load_2addr_b64 in GFX11
1316 (0x78, 0x78, 0x78, 0x78, 0x78, 0x78, "ds_read2st64_b64"), #ds_load_2addr_stride64_b64 in GFX11
1317 ( -1, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, "ds_condxchg32_rtn_b64"),
1318 (0x80, 0x80, 0x80, 0x80, 0x80, -1, "ds_add_src2_u32"),
1319 (0x81, 0x81, 0x81, 0x81, 0x81, -1, "ds_sub_src2_u32"),
1320 (0x82, 0x82, 0x82, 0x82, 0x82, -1, "ds_rsub_src2_u32"),
1321 (0x83, 0x83, 0x83, 0x83, 0x83, -1, "ds_inc_src2_u32"),
1322 (0x84, 0x84, 0x84, 0x84, 0x84, -1, "ds_dec_src2_u32"),
1323 (0x85, 0x85, 0x85, 0x85, 0x85, -1, "ds_min_src2_i32"),
1324 (0x86, 0x86, 0x86, 0x86, 0x86, -1, "ds_max_src2_i32"),
1325 (0x87, 0x87, 0x87, 0x87, 0x87, -1, "ds_min_src2_u32"),
1326 (0x88, 0x88, 0x88, 0x88, 0x88, -1, "ds_max_src2_u32"),
1327 (0x89, 0x89, 0x89, 0x89, 0x89, -1, "ds_and_src2_b32"),
1328 (0x8a, 0x8a, 0x8a, 0x8a, 0x8a, -1, "ds_or_src2_b32"),
1329 (0x8b, 0x8b, 0x8b, 0x8b, 0x8b, -1, "ds_xor_src2_b32"),
1330 (0x8d, 0x8d, 0x8d, 0x8d, 0x8d, -1, "ds_write_src2_b32"),
1331 (0x92, 0x92, 0x92, 0x92, 0x92, -1, "ds_min_src2_f32"),
1332 (0x93, 0x93, 0x93, 0x93, 0x93, -1, "ds_max_src2_f32"),
1333 ( -1, -1, 0x95, 0x95, 0x95, -1, "ds_add_src2_f32"),
1334 ( -1, 0x18, 0x98, 0x98, 0x18, 0x18, "ds_gws_sema_release_all"),
1335 (0x19, 0x19, 0x99, 0x99, 0x19, 0x19, "ds_gws_init"),
1336 (0x1a, 0x1a, 0x9a, 0x9a, 0x1a, 0x1a, "ds_gws_sema_v"),
1337 (0x1b, 0x1b, 0x9b, 0x9b, 0x1b, 0x1b, "ds_gws_sema_br"),
1338 (0x1c, 0x1c, 0x9c, 0x9c, 0x1c, 0x1c, "ds_gws_sema_p"),
1339 (0x1d, 0x1d, 0x9d, 0x9d, 0x1d, 0x1d, "ds_gws_barrier"),
1340 ( -1, -1, 0xb6, 0xb6, 0xb1, 0xb1, "ds_read_addtid_b32"), #ds_load_addtid_b32 in GFX11
1341 (0x3d, 0x3d, 0xbd, 0xbd, 0x3d, 0x3d, "ds_consume"),
1342 (0x3e, 0x3e, 0xbe, 0xbe, 0x3e, 0x3e, "ds_append"),
1343 (0x3f, 0x3f, 0xbf, 0xbf, 0x3f, 0x3f, "ds_ordered_count"),
1344 (0xc0, 0xc0, 0xc0, 0xc0, 0xc0, -1, "ds_add_src2_u64"),
1345 (0xc1, 0xc1, 0xc1, 0xc1, 0xc1, -1, "ds_sub_src2_u64"),
1346 (0xc2, 0xc2, 0xc2, 0xc2, 0xc2, -1, "ds_rsub_src2_u64"),
1347 (0xc3, 0xc3, 0xc3, 0xc3, 0xc3, -1, "ds_inc_src2_u64"),
1348 (0xc4, 0xc4, 0xc4, 0xc4, 0xc4, -1, "ds_dec_src2_u64"),
1349 (0xc5, 0xc5, 0xc5, 0xc5, 0xc5, -1, "ds_min_src2_i64"),
1350 (0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -1, "ds_max_src2_i64"),
1351 (0xc7, 0xc7, 0xc7, 0xc7, 0xc7, -1, "ds_min_src2_u64"),
1352 (0xc8, 0xc8, 0xc8, 0xc8, 0xc8, -1, "ds_max_src2_u64"),
1353 (0xc9, 0xc9, 0xc9, 0xc9, 0xc9, -1, "ds_and_src2_b64"),
1354 (0xca, 0xca, 0xca, 0xca, 0xca, -1, "ds_or_src2_b64"),
1355 (0xcb, 0xcb, 0xcb, 0xcb, 0xcb, -1, "ds_xor_src2_b64"),
1356 (0xcd, 0xcd, 0xcd, 0xcd, 0xcd, -1, "ds_write_src2_b64"),
1357 (0xd2, 0xd2, 0xd2, 0xd2, 0xd2, -1, "ds_min_src2_f64"),
1358 (0xd3, 0xd3, 0xd3, 0xd3, 0xd3, -1, "ds_max_src2_f64"),
1359 ( -1, 0xde, 0xde, 0xde, 0xde, 0xde, "ds_write_b96"), #ds_store_b96 in GFX11
1360 ( -1, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, "ds_write_b128"), #ds_store_b128 in GFX11
1361 ( -1, 0xfd, 0xfd, -1, -1, -1, "ds_condxchg32_rtn_b128"),
1362 ( -1, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, "ds_read_b96"), #ds_load_b96 in GFX11
1363 ( -1, 0xff, 0xff, 0xff, 0xff, 0xff, "ds_read_b128"), #ds_load_b128 in GFX11
1364 ( -1, -1, -1, -1, -1, 0x7a, "ds_add_gs_reg_rtn"),
1365 ( -1, -1, -1, -1, -1, 0x7b, "ds_sub_gs_reg_rtn"),
1367 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in DS:
1368 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.DS, InstrClass.DS)
1371 # LDSDIR instructions:
1373 (0x00, "lds_param_load"),
1374 (0x01, "lds_direct_load"),
1376 for (code, name) in LDSDIR:
1377 opcode(name, -1, -1, -1, code, Format.LDSDIR, InstrClass.DS)
1379 # MUBUF instructions:
1381 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "buffer_load_format_x"),
1382 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "buffer_load_format_xy"),
1383 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "buffer_load_format_xyz"),
1384 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "buffer_load_format_xyzw"),
1385 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "buffer_store_format_x"),
1386 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "buffer_store_format_xy"),
1387 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "buffer_store_format_xyz"),
1388 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "buffer_store_format_xyzw"),
1389 ( -1, -1, 0x08, 0x08, 0x80, 0x08, "buffer_load_format_d16_x"),
1390 ( -1, -1, 0x09, 0x09, 0x81, 0x09, "buffer_load_format_d16_xy"),
1391 ( -1, -1, 0x0a, 0x0a, 0x82, 0x0a, "buffer_load_format_d16_xyz"),
1392 ( -1, -1, 0x0b, 0x0b, 0x83, 0x0b, "buffer_load_format_d16_xyzw"),
1393 ( -1, -1, 0x0c, 0x0c, 0x84, 0x0c, "buffer_store_format_d16_x"),
1394 ( -1, -1, 0x0d, 0x0d, 0x85, 0x0d, "buffer_store_format_d16_xy"),
1395 ( -1, -1, 0x0e, 0x0e, 0x86, 0x0e, "buffer_store_format_d16_xyz"),
1396 ( -1, -1, 0x0f, 0x0f, 0x87, 0x0f, "buffer_store_format_d16_xyzw"),
1397 (0x08, 0x08, 0x10, 0x10, 0x08, 0x10, "buffer_load_ubyte"),
1398 (0x09, 0x09, 0x11, 0x11, 0x09, 0x11, "buffer_load_sbyte"),
1399 (0x0a, 0x0a, 0x12, 0x12, 0x0a, 0x12, "buffer_load_ushort"),
1400 (0x0b, 0x0b, 0x13, 0x13, 0x0b, 0x13, "buffer_load_sshort"),
1401 (0x0c, 0x0c, 0x14, 0x14, 0x0c, 0x14, "buffer_load_dword"),
1402 (0x0d, 0x0d, 0x15, 0x15, 0x0d, 0x15, "buffer_load_dwordx2"),
1403 ( -1, 0x0f, 0x16, 0x16, 0x0f, 0x16, "buffer_load_dwordx3"),
1404 (0x0f, 0x0e, 0x17, 0x17, 0x0e, 0x17, "buffer_load_dwordx4"),
1405 (0x18, 0x18, 0x18, 0x18, 0x18, 0x18, "buffer_store_byte"),
1406 ( -1, -1, -1, 0x19, 0x19, 0x24, "buffer_store_byte_d16_hi"),
1407 (0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x19, "buffer_store_short"),
1408 ( -1, -1, -1, 0x1b, 0x1b, 0x25, "buffer_store_short_d16_hi"),
1409 (0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1a, "buffer_store_dword"),
1410 (0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1b, "buffer_store_dwordx2"),
1411 ( -1, 0x1f, 0x1e, 0x1e, 0x1f, 0x1c, "buffer_store_dwordx3"),
1412 (0x1e, 0x1e, 0x1f, 0x1f, 0x1e, 0x1d, "buffer_store_dwordx4"),
1413 ( -1, -1, -1, 0x20, 0x20, 0x1e, "buffer_load_ubyte_d16"),
1414 ( -1, -1, -1, 0x21, 0x21, 0x21, "buffer_load_ubyte_d16_hi"),
1415 ( -1, -1, -1, 0x22, 0x22, 0x1f, "buffer_load_sbyte_d16"),
1416 ( -1, -1, -1, 0x23, 0x23, 0x22, "buffer_load_sbyte_d16_hi"),
1417 ( -1, -1, -1, 0x24, 0x24, 0x20, "buffer_load_short_d16"),
1418 ( -1, -1, -1, 0x25, 0x25, 0x23, "buffer_load_short_d16_hi"),
1419 ( -1, -1, -1, 0x26, 0x26, 0x26, "buffer_load_format_d16_hi_x"),
1420 ( -1, -1, -1, 0x27, 0x27, 0x27, "buffer_store_format_d16_hi_x"),
1421 ( -1, -1, 0x3d, 0x3d, -1, -1, "buffer_store_lds_dword"),
1422 (0x71, 0x71, 0x3e, 0x3e, -1, -1, "buffer_wbinvl1"),
1423 (0x70, 0x70, 0x3f, 0x3f, -1, -1, "buffer_wbinvl1_vol"),
1424 (0x30, 0x30, 0x40, 0x40, 0x30, 0x33, "buffer_atomic_swap"),
1425 (0x31, 0x31, 0x41, 0x41, 0x31, 0x34, "buffer_atomic_cmpswap"),
1426 (0x32, 0x32, 0x42, 0x42, 0x32, 0x35, "buffer_atomic_add"),
1427 (0x33, 0x33, 0x43, 0x43, 0x33, 0x36, "buffer_atomic_sub"),
1428 (0x34, -1, -1, -1, -1, -1, "buffer_atomic_rsub"),
1429 (0x35, 0x35, 0x44, 0x44, 0x35, 0x38, "buffer_atomic_smin"),
1430 (0x36, 0x36, 0x45, 0x45, 0x36, 0x39, "buffer_atomic_umin"),
1431 (0x37, 0x37, 0x46, 0x46, 0x37, 0x3a, "buffer_atomic_smax"),
1432 (0x38, 0x38, 0x47, 0x47, 0x38, 0x3b, "buffer_atomic_umax"),
1433 (0x39, 0x39, 0x48, 0x48, 0x39, 0x3c, "buffer_atomic_and"),
1434 (0x3a, 0x3a, 0x49, 0x49, 0x3a, 0x3d, "buffer_atomic_or"),
1435 (0x3b, 0x3b, 0x4a, 0x4a, 0x3b, 0x3e, "buffer_atomic_xor"),
1436 (0x3c, 0x3c, 0x4b, 0x4b, 0x3c, 0x3f, "buffer_atomic_inc"),
1437 (0x3d, 0x3d, 0x4c, 0x4c, 0x3d, 0x40, "buffer_atomic_dec"),
1438 (0x3e, 0x3e, -1, -1, 0x3e, 0x50, "buffer_atomic_fcmpswap"),
1439 (0x3f, 0x3f, -1, -1, 0x3f, 0x51, "buffer_atomic_fmin"),
1440 (0x40, 0x40, -1, -1, 0x40, 0x52, "buffer_atomic_fmax"),
1441 (0x50, 0x50, 0x60, 0x60, 0x50, 0x41, "buffer_atomic_swap_x2"),
1442 (0x51, 0x51, 0x61, 0x61, 0x51, 0x42, "buffer_atomic_cmpswap_x2"),
1443 (0x52, 0x52, 0x62, 0x62, 0x52, 0x43, "buffer_atomic_add_x2"),
1444 (0x53, 0x53, 0x63, 0x63, 0x53, 0x44, "buffer_atomic_sub_x2"),
1445 (0x54, -1, -1, -1, -1, -1, "buffer_atomic_rsub_x2"),
1446 (0x55, 0x55, 0x64, 0x64, 0x55, 0x45, "buffer_atomic_smin_x2"),
1447 (0x56, 0x56, 0x65, 0x65, 0x56, 0x46, "buffer_atomic_umin_x2"),
1448 (0x57, 0x57, 0x66, 0x66, 0x57, 0x47, "buffer_atomic_smax_x2"),
1449 (0x58, 0x58, 0x67, 0x67, 0x58, 0x48, "buffer_atomic_umax_x2"),
1450 (0x59, 0x59, 0x68, 0x68, 0x59, 0x49, "buffer_atomic_and_x2"),
1451 (0x5a, 0x5a, 0x69, 0x69, 0x5a, 0x4a, "buffer_atomic_or_x2"),
1452 (0x5b, 0x5b, 0x6a, 0x6a, 0x5b, 0x4b, "buffer_atomic_xor_x2"),
1453 (0x5c, 0x5c, 0x6b, 0x6b, 0x5c, 0x4c, "buffer_atomic_inc_x2"),
1454 (0x5d, 0x5d, 0x6c, 0x6c, 0x5d, 0x4d, "buffer_atomic_dec_x2"),
1455 (0x5e, 0x5e, -1, -1, 0x5e, -1, "buffer_atomic_fcmpswap_x2"),
1456 (0x5f, 0x5f, -1, -1, 0x5f, -1, "buffer_atomic_fmin_x2"),
1457 (0x60, 0x60, -1, -1, 0x60, -1, "buffer_atomic_fmax_x2"),
1458 ( -1, -1, -1, -1, 0x71, 0x2b, "buffer_gl0_inv"),
1459 ( -1, -1, -1, -1, 0x72, 0x2c, "buffer_gl1_inv"),
1460 ( -1, -1, -1, -1, 0x34, 0x37, "buffer_atomic_csub"), #GFX10.3+. seems glc must be set. buffer_atomic_csub_u32 in GFX11
1461 ( -1, -1, -1, -1, -1, 0x31, "buffer_load_lds_b32"),
1462 ( -1, -1, -1, -1, -1, 0x32, "buffer_load_lds_format_x"),
1463 ( -1, -1, -1, -1, -1, 0x2e, "buffer_load_lds_i8"),
1464 ( -1, -1, -1, -1, -1, 0x30, "buffer_load_lds_i16"),
1465 ( -1, -1, -1, -1, -1, 0x2d, "buffer_load_lds_u8"),
1466 ( -1, -1, -1, -1, -1, 0x2f, "buffer_load_lds_u16"),
1467 ( -1, -1, -1, -1, -1, 0x56, "buffer_atomic_add_f32"),
1469 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in MUBUF:
1470 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.MUBUF, InstrClass.VMem, is_atomic = "atomic" in name)
1473 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"),
1474 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "tbuffer_load_format_xy"),
1475 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "tbuffer_load_format_xyz"),
1476 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "tbuffer_load_format_xyzw"),
1477 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "tbuffer_store_format_x"),
1478 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "tbuffer_store_format_xy"),
1479 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "tbuffer_store_format_xyz"),
1480 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "tbuffer_store_format_xyzw"),
1481 ( -1, -1, 0x08, 0x08, 0x08, 0x08, "tbuffer_load_format_d16_x"),
1482 ( -1, -1, 0x09, 0x09, 0x09, 0x09, "tbuffer_load_format_d16_xy"),
1483 ( -1, -1, 0x0a, 0x0a, 0x0a, 0x0a, "tbuffer_load_format_d16_xyz"),
1484 ( -1, -1, 0x0b, 0x0b, 0x0b, 0x0b, "tbuffer_load_format_d16_xyzw"),
1485 ( -1, -1, 0x0c, 0x0c, 0x0c, 0x0c, "tbuffer_store_format_d16_x"),
1486 ( -1, -1, 0x0d, 0x0d, 0x0d, 0x0d, "tbuffer_store_format_d16_xy"),
1487 ( -1, -1, 0x0e, 0x0e, 0x0e, 0x0e, "tbuffer_store_format_d16_xyz"),
1488 ( -1, -1, 0x0f, 0x0f, 0x0f, 0x0f, "tbuffer_store_format_d16_xyzw"),
1490 for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in MTBUF:
1491 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.MTBUF, InstrClass.VMem)
1495 (0x00, 0x00, "image_load"),
1496 (0x01, 0x01, "image_load_mip"),
1497 (0x02, 0x02, "image_load_pck"),
1498 (0x03, 0x03, "image_load_pck_sgn"),
1499 (0x04, 0x04, "image_load_mip_pck"),
1500 (0x05, 0x05, "image_load_mip_pck_sgn"),
1501 (0x08, 0x06, "image_store"),
1502 (0x09, 0x07, "image_store_mip"),
1503 (0x0a, 0x08, "image_store_pck"),
1504 (0x0b, 0x09, "image_store_mip_pck"),
1505 (0x0e, 0x17, "image_get_resinfo"),
1506 (0x60, 0x38, "image_get_lod"),
1508 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (code, code, code, code, code, name)
1509 for (code, gfx11, name) in IMAGE:
1510 opcode(name, code, code, code, gfx11, Format.MIMG, InstrClass.VMem)
1512 opcode("image_msaa_load", -1, -1, 0x80, 0x18, Format.MIMG, InstrClass.VMem) #GFX10.3+
1515 (0x0f, 0x0f, 0x10, 0x0a, "image_atomic_swap"),
1516 (0x10, 0x10, 0x11, 0x0b, "image_atomic_cmpswap"),
1517 (0x11, 0x11, 0x12, 0x0c, "image_atomic_add"),
1518 (0x12, 0x12, 0x13, 0x0d, "image_atomic_sub"),
1519 (0x13, -1, -1, -1, "image_atomic_rsub"),
1520 (0x14, 0x14, 0x14, 0x0e, "image_atomic_smin"),
1521 (0x15, 0x15, 0x15, 0x0f, "image_atomic_umin"),
1522 (0x16, 0x16, 0x16, 0x10, "image_atomic_smax"),
1523 (0x17, 0x17, 0x17, 0x11, "image_atomic_umax"),
1524 (0x18, 0x18, 0x18, 0x12, "image_atomic_and"),
1525 (0x19, 0x19, 0x19, 0x13, "image_atomic_or"),
1526 (0x1a, 0x1a, 0x1a, 0x14, "image_atomic_xor"),
1527 (0x1b, 0x1b, 0x1b, 0x15, "image_atomic_inc"),
1528 (0x1c, 0x1c, 0x1c, 0x16, "image_atomic_dec"),
1529 (0x1d, 0x1d, -1, -1, "image_atomic_fcmpswap"),
1530 (0x1e, 0x1e, -1, -1, "image_atomic_fmin"),
1531 (0x1f, 0x1f, -1, -1, "image_atomic_fmax"),
1533 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (gfx6, gfx7, gfx89, gfx89, ???, gfx11, name)
1534 # gfx7 and gfx10 opcodes are the same here
1535 for (gfx6, gfx7, gfx89, gfx11, name) in IMAGE_ATOMIC:
1536 opcode(name, gfx7, gfx89, gfx7, gfx11, Format.MIMG, InstrClass.VMem, is_atomic = True)
1539 (0x20, 0x1b, "image_sample"),
1540 (0x21, 0x40, "image_sample_cl"),
1541 (0x22, 0x1c, "image_sample_d"),
1542 (0x23, 0x41, "image_sample_d_cl"),
1543 (0x24, 0x1d, "image_sample_l"),
1544 (0x25, 0x1e, "image_sample_b"),
1545 (0x26, 0x42, "image_sample_b_cl"),
1546 (0x27, 0x1f, "image_sample_lz"),
1547 (0x28, 0x20, "image_sample_c"),
1548 (0x29, 0x43, "image_sample_c_cl"),
1549 (0x2a, 0x21, "image_sample_c_d"),
1550 (0x2b, 0x44, "image_sample_c_d_cl"),
1551 (0x2c, 0x22, "image_sample_c_l"),
1552 (0x2d, 0x23, "image_sample_c_b"),
1553 (0x2e, 0x45, "image_sample_c_b_cl"),
1554 (0x2f, 0x24, "image_sample_c_lz"),
1555 (0x30, 0x25, "image_sample_o"),
1556 (0x31, 0x46, "image_sample_cl_o"),
1557 (0x32, 0x26, "image_sample_d_o"),
1558 (0x33, 0x47, "image_sample_d_cl_o"),
1559 (0x34, 0x27, "image_sample_l_o"),
1560 (0x35, 0x28, "image_sample_b_o"),
1561 (0x36, 0x48, "image_sample_b_cl_o"),
1562 (0x37, 0x29, "image_sample_lz_o"),
1563 (0x38, 0x2a, "image_sample_c_o"),
1564 (0x39, 0x49, "image_sample_c_cl_o"),
1565 (0x3a, 0x2b, "image_sample_c_d_o"),
1566 (0x3b, 0x4a, "image_sample_c_d_cl_o"),
1567 (0x3c, 0x2c, "image_sample_c_l_o"),
1568 (0x3d, 0x2d, "image_sample_c_b_o"),
1569 (0x3e, 0x4b, "image_sample_c_b_cl_o"),
1570 (0x3f, 0x2e, "image_sample_c_lz_o"),
1571 (0x68, -1, "image_sample_cd"),
1572 (0x69, -1, "image_sample_cd_cl"),
1573 (0x6a, -1, "image_sample_c_cd"),
1574 (0x6b, -1, "image_sample_c_cd_cl"),
1575 (0x6c, -1, "image_sample_cd_o"),
1576 (0x6d, -1, "image_sample_cd_cl_o"),
1577 (0x6e, -1, "image_sample_c_cd_o"),
1578 (0x6f, -1, "image_sample_c_cd_cl_o"),
1580 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (code, code, code, code, code, gfx11, name)
1581 for (code, gfx11, name) in IMAGE_SAMPLE:
1582 opcode(name, code, code, code, gfx11, Format.MIMG, InstrClass.VMem)
1584 IMAGE_SAMPLE_G16 = {
1585 (0xa2, 0x39, "image_sample_d_g16"),
1586 (0xa3, 0x5f, "image_sample_d_cl_g16"),
1587 (0xaa, 0x3a, "image_sample_c_d_g16"),
1588 (0xab, 0x54, "image_sample_c_d_cl_g16"),
1589 (0xb2, 0x3b, "image_sample_d_o_g16"),
1590 (0xb3, 0x55, "image_sample_d_cl_o_g16"),
1591 (0xba, 0x3c, "image_sample_c_d_o_g16"),
1592 (0xbb, 0x56, "image_sample_c_d_cl_o_g16"),
1595 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, -1, -1, code, gfx11, name)
1596 for (code, gfx11, name) in IMAGE_SAMPLE_G16:
1597 opcode(name, -1, -1, code, gfx11, Format.MIMG, InstrClass.VMem)
1600 (0x40, 0x2f, "image_gather4"),
1601 (0x41, 0x60, "image_gather4_cl"),
1602 #(0x42, "image_gather4h"), VEGA only?
1603 (0x44, 0x30, "image_gather4_l"), # following instructions have different opcodes according to ISA sheet.
1604 (0x45, 0x31, "image_gather4_b"),
1605 (0x46, 0x61, "image_gather4_b_cl"),
1606 (0x47, 0x32, "image_gather4_lz"),
1607 (0x48, 0x33, "image_gather4_c"),
1608 (0x49, 0x62, "image_gather4_c_cl"), # previous instructions have different opcodes according to ISA sheet.
1609 #(0x4a, "image_gather4h_pck"), VEGA only?
1610 #(0x4b, "image_gather8h_pck"), VGEA only?
1611 (0x4c, 0x63, "image_gather4_c_l"),
1612 (0x4d, 0x64, "image_gather4_c_b"),
1613 (0x4e, 0x65, "image_gather4_c_b_cl"),
1614 (0x4f, 0x34, "image_gather4_c_lz"),
1615 (0x50, 0x35, "image_gather4_o"),
1616 (0x51, -1, "image_gather4_cl_o"),
1617 (0x54, -1, "image_gather4_l_o"),
1618 (0x55, -1, "image_gather4_b_o"),
1619 (0x56, -1, "image_gather4_b_cl_o"),
1620 (0x57, 0x36, "image_gather4_lz_o"),
1621 (0x58, -1, "image_gather4_c_o"),
1622 (0x59, -1, "image_gather4_c_cl_o"),
1623 (0x5c, -1, "image_gather4_c_l_o"),
1624 (0x5d, -1, "image_gather4_c_b_o"),
1625 (0x5e, -1, "image_gather4_c_b_cl_o"),
1626 (0x5f, 0x37, "image_gather4_c_lz_o"),
1628 # (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (code, code, code, code, code, gfx11, name)
1629 for (code, gfx11, name) in IMAGE_GATHER4:
1630 opcode(name, code, code, code, gfx11, Format.MIMG, InstrClass.VMem)
1632 opcode("image_bvh_intersect_ray", -1, -1, 0xe6, 0x19, Format.MIMG, InstrClass.VMem)
1633 opcode("image_bvh64_intersect_ray", -1, -1, 0xe7, 0x1a, Format.MIMG, InstrClass.VMem)
1636 #GFX7, GFX89,GFX10,GFX11
1637 (0x08, 0x10, 0x08, 0x10, "flat_load_ubyte"),
1638 (0x09, 0x11, 0x09, 0x11, "flat_load_sbyte"),
1639 (0x0a, 0x12, 0x0a, 0x12, "flat_load_ushort"),
1640 (0x0b, 0x13, 0x0b, 0x13, "flat_load_sshort"),
1641 (0x0c, 0x14, 0x0c, 0x14, "flat_load_dword"),
1642 (0x0d, 0x15, 0x0d, 0x15, "flat_load_dwordx2"),
1643 (0x0f, 0x16, 0x0f, 0x16, "flat_load_dwordx3"),
1644 (0x0e, 0x17, 0x0e, 0x17, "flat_load_dwordx4"),
1645 (0x18, 0x18, 0x18, 0x18, "flat_store_byte"),
1646 ( -1, 0x19, 0x19, 0x24, "flat_store_byte_d16_hi"),
1647 (0x1a, 0x1a, 0x1a, 0x19, "flat_store_short"),
1648 ( -1, 0x1b, 0x1b, 0x25, "flat_store_short_d16_hi"),
1649 (0x1c, 0x1c, 0x1c, 0x1a, "flat_store_dword"),
1650 (0x1d, 0x1d, 0x1d, 0x1b, "flat_store_dwordx2"),
1651 (0x1f, 0x1e, 0x1f, 0x1c, "flat_store_dwordx3"),
1652 (0x1e, 0x1f, 0x1e, 0x1d, "flat_store_dwordx4"),
1653 ( -1, 0x20, 0x20, 0x1e, "flat_load_ubyte_d16"),
1654 ( -1, 0x21, 0x21, 0x21, "flat_load_ubyte_d16_hi"),
1655 ( -1, 0x22, 0x22, 0x1f, "flat_load_sbyte_d16"),
1656 ( -1, 0x23, 0x23, 0x22, "flat_load_sbyte_d16_hi"),
1657 ( -1, 0x24, 0x24, 0x20, "flat_load_short_d16"),
1658 ( -1, 0x25, 0x25, 0x23, "flat_load_short_d16_hi"),
1659 (0x30, 0x40, 0x30, 0x33, "flat_atomic_swap"),
1660 (0x31, 0x41, 0x31, 0x34, "flat_atomic_cmpswap"),
1661 (0x32, 0x42, 0x32, 0x35, "flat_atomic_add"),
1662 (0x33, 0x43, 0x33, 0x36, "flat_atomic_sub"),
1663 (0x35, 0x44, 0x35, 0x38, "flat_atomic_smin"),
1664 (0x36, 0x45, 0x36, 0x39, "flat_atomic_umin"),
1665 (0x37, 0x46, 0x37, 0x3a, "flat_atomic_smax"),
1666 (0x38, 0x47, 0x38, 0x3b, "flat_atomic_umax"),
1667 (0x39, 0x48, 0x39, 0x3c, "flat_atomic_and"),
1668 (0x3a, 0x49, 0x3a, 0x3d, "flat_atomic_or"),
1669 (0x3b, 0x4a, 0x3b, 0x3e, "flat_atomic_xor"),
1670 (0x3c, 0x4b, 0x3c, 0x3f, "flat_atomic_inc"),
1671 (0x3d, 0x4c, 0x3d, 0x40, "flat_atomic_dec"),
1672 (0x3e, -1, 0x3e, 0x50, "flat_atomic_fcmpswap"),
1673 (0x3f, -1, 0x3f, 0x51, "flat_atomic_fmin"),
1674 (0x40, -1, 0x40, 0x52, "flat_atomic_fmax"),
1675 (0x50, 0x60, 0x50, 0x41, "flat_atomic_swap_x2"),
1676 (0x51, 0x61, 0x51, 0x42, "flat_atomic_cmpswap_x2"),
1677 (0x52, 0x62, 0x52, 0x43, "flat_atomic_add_x2"),
1678 (0x53, 0x63, 0x53, 0x44, "flat_atomic_sub_x2"),
1679 (0x55, 0x64, 0x55, 0x45, "flat_atomic_smin_x2"),
1680 (0x56, 0x65, 0x56, 0x46, "flat_atomic_umin_x2"),
1681 (0x57, 0x66, 0x57, 0x47, "flat_atomic_smax_x2"),
1682 (0x58, 0x67, 0x58, 0x48, "flat_atomic_umax_x2"),
1683 (0x59, 0x68, 0x59, 0x49, "flat_atomic_and_x2"),
1684 (0x5a, 0x69, 0x5a, 0x4a, "flat_atomic_or_x2"),
1685 (0x5b, 0x6a, 0x5b, 0x4b, "flat_atomic_xor_x2"),
1686 (0x5c, 0x6b, 0x5c, 0x4c, "flat_atomic_inc_x2"),
1687 (0x5d, 0x6c, 0x5d, 0x4d, "flat_atomic_dec_x2"),
1688 (0x5e, -1, 0x5e, -1, "flat_atomic_fcmpswap_x2"),
1689 (0x5f, -1, 0x5f, -1, "flat_atomic_fmin_x2"),
1690 (0x60, -1, 0x60, -1, "flat_atomic_fmax_x2"),
1691 ( -1, -1, -1, 0x56, "flat_atomic_add_f32"),
1693 for (gfx7, gfx8, gfx10, gfx11, name) in FLAT:
1694 opcode(name, gfx7, gfx8, gfx10, gfx11, Format.FLAT, InstrClass.VMem, is_atomic = "atomic" in name) #TODO: also LDS?
1698 (0x10, 0x08, 0x10, "global_load_ubyte"),
1699 (0x11, 0x09, 0x11, "global_load_sbyte"),
1700 (0x12, 0x0a, 0x12, "global_load_ushort"),
1701 (0x13, 0x0b, 0x13, "global_load_sshort"),
1702 (0x14, 0x0c, 0x14, "global_load_dword"),
1703 (0x15, 0x0d, 0x15, "global_load_dwordx2"),
1704 (0x16, 0x0f, 0x16, "global_load_dwordx3"),
1705 (0x17, 0x0e, 0x17, "global_load_dwordx4"),
1706 (0x18, 0x18, 0x18, "global_store_byte"),
1707 (0x19, 0x19, 0x24, "global_store_byte_d16_hi"),
1708 (0x1a, 0x1a, 0x19, "global_store_short"),
1709 (0x1b, 0x1b, 0x25, "global_store_short_d16_hi"),
1710 (0x1c, 0x1c, 0x1a, "global_store_dword"),
1711 (0x1d, 0x1d, 0x1b, "global_store_dwordx2"),
1712 (0x1e, 0x1f, 0x1c, "global_store_dwordx3"),
1713 (0x1f, 0x1e, 0x1d, "global_store_dwordx4"),
1714 (0x20, 0x20, 0x1e, "global_load_ubyte_d16"),
1715 (0x21, 0x21, 0x21, "global_load_ubyte_d16_hi"),
1716 (0x22, 0x22, 0x1f, "global_load_sbyte_d16"),
1717 (0x23, 0x23, 0x22, "global_load_sbyte_d16_hi"),
1718 (0x24, 0x24, 0x20, "global_load_short_d16"),
1719 (0x25, 0x25, 0x23, "global_load_short_d16_hi"),
1720 (0x40, 0x30, 0x33, "global_atomic_swap"),
1721 (0x41, 0x31, 0x34, "global_atomic_cmpswap"),
1722 (0x42, 0x32, 0x35, "global_atomic_add"),
1723 (0x43, 0x33, 0x36, "global_atomic_sub"),
1724 (0x44, 0x35, 0x38, "global_atomic_smin"),
1725 (0x45, 0x36, 0x39, "global_atomic_umin"),
1726 (0x46, 0x37, 0x3a, "global_atomic_smax"),
1727 (0x47, 0x38, 0x3b, "global_atomic_umax"),
1728 (0x48, 0x39, 0x3c, "global_atomic_and"),
1729 (0x49, 0x3a, 0x3d, "global_atomic_or"),
1730 (0x4a, 0x3b, 0x3e, "global_atomic_xor"),
1731 (0x4b, 0x3c, 0x3f, "global_atomic_inc"),
1732 (0x4c, 0x3d, 0x40, "global_atomic_dec"),
1733 ( -1, 0x3e, 0x50, "global_atomic_fcmpswap"),
1734 ( -1, 0x3f, 0x51, "global_atomic_fmin"),
1735 ( -1, 0x40, 0x52, "global_atomic_fmax"),
1736 (0x60, 0x50, 0x41, "global_atomic_swap_x2"),
1737 (0x61, 0x51, 0x42, "global_atomic_cmpswap_x2"),
1738 (0x62, 0x52, 0x43, "global_atomic_add_x2"),
1739 (0x63, 0x53, 0x44, "global_atomic_sub_x2"),
1740 (0x64, 0x55, 0x45, "global_atomic_smin_x2"),
1741 (0x65, 0x56, 0x46, "global_atomic_umin_x2"),
1742 (0x66, 0x57, 0x47, "global_atomic_smax_x2"),
1743 (0x67, 0x58, 0x48, "global_atomic_umax_x2"),
1744 (0x68, 0x59, 0x49, "global_atomic_and_x2"),
1745 (0x69, 0x5a, 0x4a, "global_atomic_or_x2"),
1746 (0x6a, 0x5b, 0x4b, "global_atomic_xor_x2"),
1747 (0x6b, 0x5c, 0x4c, "global_atomic_inc_x2"),
1748 (0x6c, 0x5d, 0x4d, "global_atomic_dec_x2"),
1749 ( -1, 0x5e, -1, "global_atomic_fcmpswap_x2"),
1750 ( -1, 0x5f, -1, "global_atomic_fmin_x2"),
1751 ( -1, 0x60, -1, "global_atomic_fmax_x2"),
1752 ( -1, 0x16, 0x28, "global_load_dword_addtid"), #GFX10.3+
1753 ( -1, 0x17, 0x29, "global_store_dword_addtid"), #GFX10.3+
1754 ( -1, 0x34, 0x37, "global_atomic_csub"), #GFX10.3+. seems glc must be set
1755 ( -1, -1, 0x56, "global_atomic_add_f32"),
1757 for (gfx8, gfx10, gfx11, name) in GLOBAL:
1758 opcode(name, -1, gfx8, gfx10, gfx11, Format.GLOBAL, InstrClass.VMem, is_atomic = "atomic" in name)
1762 (0x10, 0x08, 0x10, "scratch_load_ubyte"),
1763 (0x11, 0x09, 0x11, "scratch_load_sbyte"),
1764 (0x12, 0x0a, 0x12, "scratch_load_ushort"),
1765 (0x13, 0x0b, 0x13, "scratch_load_sshort"),
1766 (0x14, 0x0c, 0x14, "scratch_load_dword"),
1767 (0x15, 0x0d, 0x15, "scratch_load_dwordx2"),
1768 (0x16, 0x0f, 0x16, "scratch_load_dwordx3"),
1769 (0x17, 0x0e, 0x17, "scratch_load_dwordx4"),
1770 (0x18, 0x18, 0x18, "scratch_store_byte"),
1771 (0x19, 0x19, 0x24, "scratch_store_byte_d16_hi"),
1772 (0x1a, 0x1a, 0x19, "scratch_store_short"),
1773 (0x1b, 0x1b, 0x25, "scratch_store_short_d16_hi"),
1774 (0x1c, 0x1c, 0x1a, "scratch_store_dword"),
1775 (0x1d, 0x1d, 0x1b, "scratch_store_dwordx2"),
1776 (0x1e, 0x1f, 0x1c, "scratch_store_dwordx3"),
1777 (0x1f, 0x1e, 0x1d, "scratch_store_dwordx4"),
1778 (0x20, 0x20, 0x1e, "scratch_load_ubyte_d16"),
1779 (0x21, 0x21, 0x21, "scratch_load_ubyte_d16_hi"),
1780 (0x22, 0x22, 0x1f, "scratch_load_sbyte_d16"),
1781 (0x23, 0x23, 0x22, "scratch_load_sbyte_d16_hi"),
1782 (0x24, 0x24, 0x20, "scratch_load_short_d16"),
1783 (0x25, 0x25, 0x23, "scratch_load_short_d16_hi"),
1785 for (gfx8, gfx10, gfx11, name) in SCRATCH:
1786 opcode(name, -1, gfx8, gfx10, gfx11, Format.SCRATCH, InstrClass.VMem)
1788 # check for duplicate opcode numbers
1789 for ver in ['gfx9', 'gfx10', 'gfx11']:
1791 for op in opcodes.values():
1792 if op.format in [Format.PSEUDO, Format.PSEUDO_BRANCH, Format.PSEUDO_BARRIER, Format.PSEUDO_REDUCTION]:
1795 num = getattr(op, 'opcode_' + ver)
1799 key = (op.format, num)
1801 if key in op_to_name:
1803 names = set([op_to_name[key], op.name])
1804 if ver in ['gfx8', 'gfx9', 'gfx11'] and names == set(['v_mul_lo_i32', 'v_mul_lo_u32']):
1806 # v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.3
1807 if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 'v_fma_legacy_f32']):
1809 # v_mac_legacy_f32 is replaced with v_fmac_legacy_f32 on GFX10.3
1810 if ver == 'gfx10' and names == set(['v_mac_legacy_f32', 'v_fmac_legacy_f32']):
1813 print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver))
1816 op_to_name[key] = op.name