1 # Copyright (c) 2013 The Native Client Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 ################################################################################
7 # three columns separated by commas. Each line describes one instruction.
8 # Notation for argument types and sizes and for opcodes is based on
9 # AMD64 Architecture Programmer's Manual.
10 ################################################################################
11 # First column: instruction description.
12 # Includes name of the instruction and arguments.
14 # Arguments consist of four parts:
15 # 1. Read/write attribute (optional).
18 # 4. Implicit argument mark (optional).
20 # Read/write attribute:
21 # ': Instruction does not use this argument (lea or nop).
22 # =: Instruction reads from this argument.
23 # !: Instruction writes to this argument.
24 # &: Instruction reads this argument and writes the result to it.
25 # By default one- and two-operand instructions are assumed to read all
26 # operands and store result to the last one, while instructions with
27 # three or more operands are assumed to read all operands except last one
28 # which is used to store the result of the execution.
29 # Possible argument types:
30 # a: Accumulator: %al/%ax/%eax/%rax/%xmm0 (depending on size).
31 # c: Counter register: %cl/%cx/%ecx/%rcx (depending on size).
32 # d: Data register: %dl/%dx/%edx/%rdx (depending on size).
33 # f: x87 register in opcode (3 least significant bits).
34 # i: Second immediate value encoded in the instruction.
35 # o: I/O port in %dx (used in "in"/"out" instructions).
36 # r: Register in opcode (3 least significant bits plus rex.B).
37 # t: Top of the x87 stack (%st).
38 # x: A memory operand addressed by the %ds:(%[er]bx). See "xlat".
39 # B: General purpose register specified by the VEX/XOP.vvvv field.
40 # C: Control register specified by the ModRM.reg field.
41 # D: Debug register specified by the ModRM.reg field.
42 # E: General purpose register or memory operand specified by the r/m
43 # field of the ModRM byte. For memory operands, the ModRM byte may
44 # be followed by a SIB byte to specify one of the indexed
45 # register-indirect addressing forms.
46 # G: General purpose register specified by the reg field of ModRM.
47 # H: YMM or XMM register specified by the VEX/XOP.vvvv field.
48 # I: Immediate value encoded in the instruction.
49 # J: The instruction encoding includes a relative offset that is added to
51 # L: YMM or XMM register specified using the most-significant 4 bits of
52 # the last byte of the instruction. In legacy or compatibility mode
53 # the most significant bit is ignored.
54 # M: A memory operand specified by the {mod, r/m} field of the ModRM byte.
56 # N: 64-bit MMX register specified by the ModRM.r/m field. The ModRM.mod
58 # O: The offset of an operand is encoded in the instruction. There is no
59 # ModRM byte in the instruction encoding. Indexed register-indirect
60 # addressing using the SIB byte is not supported.
61 # P: 64-bit MMX register specified by the ModRM.reg field.
62 # Q: 64-bit MMX-register or memory operand specified by the {mod, r/m}
63 # field of the ModRM byte. For memory operands, the ModRM byte may
64 # be followed by a SIB byte to specify one of the indexed
65 # register-indirect addressing forms.
66 # R: General purpose register specified by the ModRM.r/m field.
67 # The ModRM.mod field must be 11b.
68 # S: Segment register specified by the ModRM.reg field.
69 # U: YMM/XMM register specified by the ModRM.r/m field.
70 # The ModRM.mod field must be 11b.
71 # V: YMM/XMM register specified by the ModRM.reg field.
72 # W: YMM/XMM register or memory operand specified by the {mod, r/m} field
73 # of the ModRM byte. For memory operands, the ModRM byte may be
74 # followed by a SIB byte to specify one of the indexed
75 # register-indirect addressing forms.
76 # X: A memory operand addressed by the %ds:%[er]si registers. Used in
77 # string instructions.
78 # Y: A memory operand addressed by the %es:%[er]di registers. Used in
79 # string instructions.
82 # A byte, word, doubleword, or quadword (in 64-bit mode),
83 # depending on the effective operand size.
84 # 2: Two bits (see VPERMIL2Px instruction).
85 # 7: x87 register %st(N).
86 # b: A byte, irrespective of the effective operand size.
87 # d: A doubleword (32-bit), irrespective of the effective operand size.
88 # do: A double octword (256 bits), irrespective of the effective operand
90 # dq: A double quadword (128 bits), irrespective of the effective
92 # fq: A quadra quadword (256 bits), irrespective of the effective
94 # o: An octword (128 bits), irrespective of the effective operand size.
95 # p: A 32-bit or 48-bit far pointer, depending on the effective operand
97 # pb: A Vector with byte-wide (8-bit) elements (packed byte).
98 # pd: A double-precision (64-bit) floating-point vector operand (packed
100 # pdw: Vector composed of 32-bit doublewords.
101 # pdwx: Vector composed of 32-bit doublewords. L bit selects 256bit YMM
103 # pdx: A double-precision (64-bit) floating-point vector operand (packed
104 # double-precision). L bit selects 256bit YMM registers.
105 # ph: A half-precision (16-bit) floating-point vector operand (packed
107 # phx: A half-precision (16-bit) floating-point vector operand (packed
108 # half-precision). L bit selects 256bit YMM registers.
109 # pi: Vector composed of 16-bit integers (packed integer).
110 # pj: Vector composed of 32-bit integers (packed double integer).
111 # pjx: Vector composed of 32-bit integers (packed double integer).
112 # L bit selects 256bit YMM registers.
113 # pk: Vector composed of 8-bit integers (packed half-word integer).
114 # pkx: Vector composed of 8-bit integers (packed half-word integer).
115 # L bit selects 256bit YMM registers.
116 # pq: Vector composed of 64-bit integers (packed quadword integer).
117 # pqw: Vector composed of 64-bit quadwords (packed quadword).
118 # pqwx: Vector composed of 64-bit quadwords (packed quadword). L bit
119 # selects 256bit YMM registers.
120 # pqx: Vector composed of 64-bit integers (packed quadword integer).
121 # L bit selects 256bit YMM registers.
122 # ps: A single-precision floating-point vector operand (packed
124 # psx: A single-precision floating-point vector operand (packed
125 # single-precision). L bit selects 256bit YMM registers.
126 # pw: Vector composed of 16-bit words (packed word).
127 # q: A quadword (64-bit), irrespective of the effective operand size.
128 # r: Register size (32bit in 32bit mode, 64bit in 64bit mode).
129 # s: Segment register (if register operand).
130 # s: A 6-byte or 10-byte pseudo-descriptor (if memory operand).
131 # sb: A scalar 10-byte packed BCD value (scalar BCD).
132 # sd: A scalar double-precision floating-point operand (scalar double).
133 # se: A 14-byte or 28-byte x87 environment.
134 # si: A scalar doubleword (32-bit) integer operand (scalar integer).
135 # sq: A scalar quadword (64-bit) integer operand (scalar integer).
136 # sr: A 94-byte or 108-byte x87 state.
137 # ss: A scalar single-precision floating-point operand (scalar single).
138 # st: A scalar 80bit-precision floating-point operand (scalar tenbytes).
139 # sw: A scalar word (16-bit) integer operand (scalar integer).
140 # sx: A 512-byte extended x87/MMX/XMM state.
141 # v: A word, doubleword, or quadword (in 64-bit mode), depending on
142 # the effective operand size.
143 # w: A word, irrespective of the effective operand size.
144 # x: Instruction supports both vector sizes (128 bits or 256 bits).
145 # Size is encoded using the VEX/XOP.L field. (L=0: 128 bits;
146 # L=1: 256 bits). Usually this symbol is appended to ps or pd, but
147 # sometimes it is used alone. For gen_dfa psx, pdx and x
149 # y: A doubleword or quadword depending on effective operand size.
150 # z: A word if the effective operand size is 16 bits, or a doubleword
151 # if the effective operand size is 32 or 64 bits.
152 # Implicit argument mark:
153 # *: This argument is implicit. It's not shown in the diassembly listing.
154 ################################################################################
155 # Second column: instruction opcodes.
156 # Includes all opcode bytes. If first opcode bytes is 0x66/data16,
157 # 0xf2/repnz, or 0xf3/rep/repz then they can be moved before other prefixes
158 # (and will be moved before REX prefix if it's allowed). Note: data16, repnz,
159 # and rep/repz opcodes will set appropriate flags while 0x66, 0xf2, and 0xf3
161 # If part of the opcode is stored in ModRM byte then opcode should include the
162 # usual "/0", "/1", ..., "/7" "bytes".
163 # For VEX/XOP instructions it is expected that first three opcode bytes are
164 # specified in the following form:
167 # <W>.<vvvv>.<L>.<pp>
168 # (so they describe long form of VEX prefix; short form is deduced
169 # automatically when appropriate)
170 ################################################################################
171 # Third column: additional instruction notes.
172 # Different kind of notes for the instruction: non-typical prefixes (for
173 # example "lock" prefix or "rep" prefix), CPUID checks, etc.
176 # branch_hint: branch hint prefixes are allowed (0x2E, 0x3E)
177 # condrep: prefixes "repnz" and "repz" are allowed for the instruction
178 # lock: prefix "lock" is allowed for the instruction
179 # rep: prefix "rep" is allowed for the instruction (it's alias of "repz")
180 # no_memory_access: command does not access memory in detectable way: lea,
181 # nop, prefetch* instructions...
182 # norex: "rex" prefix can not be used with this instruction (various "nop"
183 # instructions use this flag)
184 # norexw: "rex.W" can not be used with this instruction (usually used when
185 # instruction with "rex.W" have a different name: e.g. "movd"/"movq")
187 # Instruction enabling/disabling:
188 # ia32: ia32-only instruction
189 # amd64: amd64-only instruction
190 # nacl-forbidden: instruction is not supported in NaCl sandbox
191 # nacl-ia32-forbidden: instruction is not supported in ia32 NaCl sandbox
192 # nacl-amd64-forbidden: instruction is not supported in amd64 NaCl sandbox
193 # disabled_untested: instruction is disabled because it is not tested yet.
196 # nacl-amd64-zero-extends: instruction can be used to zero-extend register
198 # nacl-amd64-modifiable: instruction can be modified in amd64 mode
199 # att-show-name-suffix-{b,l,ll,t,s,q,x,y,w}: instruction is shown with the
200 # given suffix by objdump in AT&T mode
202 # CPU features are defined in validator_internal.h.
203 ################################################################################
206 # Technically, columns are separated with mere ',' followed by spaces for
207 # readability, but there are quoted instruction names that include commas
208 # not followed by spaces (see nops.def).
209 # For simplicity I choose to rely on this coincidence and use split-based parser
210 # instead of proper recursive descent one.
211 # If by accident somebody put ', ' in quoted instruction name, it will fail
212 # loudly, because closing quote then will fall into second or third column and
213 # will cause parse error.
214 # TODO(shcherbina): use for column separator something that is never encountered
215 # in columns, like semicolon?
216 COLUMN_SEPARATOR = ', '
219 SUPPORTED_ATTRIBUTES = [
220 # Parsing attributes.
231 'CPUFeature_3DPRFTCH',
234 'CPUFeature_ALTMOVCR8',
237 'CPUFeature_CLFLUSH',
239 'CPUFeature_CLMULAVX',
241 'CPUFeature_CMOVx87',
246 'CPUFeature_EMMXSSE',
270 'CPUFeature_SYSCALL',
278 # Attributes for enabling/disabling based on architecture and validity.
281 'nacl-ia32-forbidden',
282 'nacl-amd64-forbidden',
284 'nacl-amd64-zero-extends',
285 'nacl-amd64-modifiable',
288 # AT&T Decoder attributes.
289 'att-show-name-suffix-b',
290 'att-show-name-suffix-l',
291 'att-show-name-suffix-ll',
292 'att-show-name-suffix-t',
293 'att-show-name-suffix-s',
294 'att-show-name-suffix-q',
295 'att-show-name-suffix-x',
296 'att-show-name-suffix-y',
297 'att-show-name-suffix-w',
301 class OperandReadWriteMode(object):
308 class OperandType(object):
314 SECOND_IMMEDIATE = 'i'
316 CONTROL_REGISTER = 'C' # in ModRM.reg
317 DEBUG_REGISTER = 'D' # in ModRM.reg
319 REGISTER_IN_OPCODE = 'r'
320 X87_REGISTER_IN_OPCODE = 'f'
322 X87_ST = 't' # st0 that objdump displays as 'st'
326 RELATIVE_TARGET = 'J'
329 REGISTER_IN_REG = 'G'
330 REGISTER_OR_MEMORY = 'E' # in ModRM.mod and .r/m
331 MEMORY = 'M' # in ModRM.mod and .r/m
332 SEGMENT_REGISTER_IN_REG = 'S'
334 MMX_REGISTER_IN_RM = 'N'
335 MMX_REGISTER_IN_REG = 'P'
336 MMX_REGISTER_OR_MEMORY = 'Q' # in ModRM.mod and .r/m
338 XMM_REGISTER_IN_RM = 'U'
339 XMM_REGISTER_IN_REG = 'V'
340 XMM_REGISTER_OR_MEMORY = 'W' # in ModRM.mod and .r/m
342 XMM_REGISTER_IN_LAST_BYTE = 'L' # most-significant 4 bits
348 REGISTER_IN_VVVV = 'B'
349 XMM_REGISTER_IN_VVVV = 'H'
354 ALL_OPERAND_TYPES = set(
355 v for k, v in OperandType.__dict__.items() if not k.startswith('__'))