1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
6 // This file was previously known as instrs.h
8 /*****************************************************************************
9 * x86 instructions for the JIT compiler
11 * id -- the enum name for the instruction
12 * nm -- textual name (for assembly dipslay)
13 * fp -- 1 = floating point instruction, 0 = not floating point instruction
14 * um -- update mode, see IUM_xx enum (rd, wr, or rw)
15 * rf -- 1 = reads flags, 0 = doesn't read flags
16 * wf -- 1 = writes flags, 0 = doesn't write flags
17 * mr -- base encoding for R/M[reg] addressing mode
18 * mi -- base encoding for R/M,icon addressing mode
19 * rm -- base encoding for reg,R/M addressing mode
20 * a4 -- base encoding for eax,i32 addressing mode
21 * rr -- base encoding for register addressing mode
23 ******************************************************************************/
26 #if !defined(_TARGET_XARCH_)
27 #error Unexpected target type
31 #error At least INST1 must be defined before including this file.
33 /*****************************************************************************/
35 #define INST0(id, nm, fp, um, rf, wf, mr )
38 #define INST2(id, nm, fp, um, rf, wf, mr, mi )
41 #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm )
44 #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 )
47 #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr)
50 /*****************************************************************************/
51 /* The following is x86-specific */
52 /*****************************************************************************/
54 // enum name FP updmode rf wf R/M[reg] R/M,icon reg,R/M eax,i32 register
55 INST5(invalid, "INVALID" , 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE)
57 INST5(push , "push" , 0, IUM_RD, 0, 0, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050)
58 INST5(pop , "pop" , 0, IUM_WR, 0, 0, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058)
59 // Does not affect the stack tracking in the emitter
60 INST5(push_hide, "push" , 0, IUM_RD, 0, 0, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050)
61 INST5(pop_hide, "pop" , 0, IUM_WR, 0, 0, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058)
63 INST5(inc , "inc" , 0, IUM_RW, 0, 1, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000040)
64 INST5(inc_l , "inc" , 0, IUM_RW, 0, 1, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C0FE)
65 INST5(dec , "dec" , 0, IUM_RW, 0, 1, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000048)
66 INST5(dec_l , "dec" , 0, IUM_RW, 0, 1, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C8FE)
68 // enum name FP updmode rf wf R/M,R/M[reg] R/M,icon reg,R/M eax,i32
70 INST4(add , "add" , 0, IUM_RW, 0, 1, 0x000000, 0x000080, 0x000002, 0x000004)
71 INST4(or , "or" , 0, IUM_RW, 0, 1, 0x000008, 0x000880, 0x00000A, 0x00000C)
72 INST4(adc , "adc" , 0, IUM_RW, 1, 1, 0x000010, 0x001080, 0x000012, 0x000014)
73 INST4(sbb , "sbb" , 0, IUM_RW, 1, 1, 0x000018, 0x001880, 0x00001A, 0x00001C)
74 INST4(and , "and" , 0, IUM_RW, 0, 1, 0x000020, 0x002080, 0x000022, 0x000024)
75 INST4(sub , "sub" , 0, IUM_RW, 0, 1, 0x000028, 0x002880, 0x00002A, 0x00002C)
76 INST4(xor , "xor" , 0, IUM_RW, 0, 1, 0x000030, 0x003080, 0x000032, 0x000034)
77 INST4(cmp , "cmp" , 0, IUM_RD, 0, 1, 0x000038, 0x003880, 0x00003A, 0x00003C)
78 INST4(test , "test" , 0, IUM_RD, 0, 1, 0x000084, 0x0000F6, 0x000084, 0x0000A8)
79 INST4(mov , "mov" , 0, IUM_WR, 0, 0, 0x000088, 0x0000C6, 0x00008A, 0x0000B0)
81 INST4(lea , "lea" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x00008D, BAD_CODE)
83 // enum name FP updmode rf wf R/M,R/M[reg] R/M,icon reg,R/M
85 // Note that emitter has only partial support for BT. It can only emit the reg,reg form
86 // and the registers need to be reversed to get the correct encoding.
87 INST3(bt , "bt" , 0, IUM_RD, 0, 1, 0x0F00A3, BAD_CODE, 0x0F00A3)
89 INST3(movsx , "movsx" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x0F00BE)
91 INST3(movsxd , "movsxd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x4800000063LL )
93 INST3(movzx , "movzx" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x0F00B6)
95 INST3(cmovo , "cmovo" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0040)
96 INST3(cmovno , "cmovno" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0041)
97 INST3(cmovb , "cmovb" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0042)
98 INST3(cmovae , "cmovae" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0043)
99 INST3(cmove , "cmove" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0044)
100 INST3(cmovne , "cmovne" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0045)
101 INST3(cmovbe , "cmovbe" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0046)
102 INST3(cmova , "cmova" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0047)
103 INST3(cmovs , "cmovs" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0048)
104 INST3(cmovns , "cmovns" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0049)
105 INST3(cmovpe , "cmovpe" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004A)
106 INST3(cmovpo , "cmovpo" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004B)
107 INST3(cmovl , "cmovl" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004C)
108 INST3(cmovge , "cmovge" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004D)
109 INST3(cmovle , "cmovle" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004E)
110 INST3(cmovg , "cmovg" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004F)
112 INST3(xchg , "xchg" , 0, IUM_RW, 0, 0, 0x000086, BAD_CODE, 0x000086)
113 INST3(imul , "imul" , 0, IUM_RW, 0, 1, 0x0F00AC, BAD_CODE, 0x0F00AF) // op1 *= op2
115 // enum name FP updmode rf wf R/M,R/M[reg] R/M,icon reg,R/M
117 // Instead of encoding these as 3-operand instructions, we encode them
118 // as 2-operand instructions with the target register being implicit
119 // implicit_reg = op1*op2_icon
120 #define INSTMUL INST3
121 INSTMUL(imul_AX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x000068, BAD_CODE)
122 INSTMUL(imul_CX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x000868, BAD_CODE)
123 INSTMUL(imul_DX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x001068, BAD_CODE)
124 INSTMUL(imul_BX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x001868, BAD_CODE)
125 INSTMUL(imul_SP, "imul", 0, IUM_RD, 0, 1, BAD_CODE, BAD_CODE, BAD_CODE)
126 INSTMUL(imul_BP, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x002868, BAD_CODE)
127 INSTMUL(imul_SI, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x003068, BAD_CODE)
128 INSTMUL(imul_DI, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x003868, BAD_CODE)
130 #ifdef _TARGET_AMD64_
132 INSTMUL(imul_08, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400000068, BAD_CODE)
133 INSTMUL(imul_09, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400000868, BAD_CODE)
134 INSTMUL(imul_10, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400001068, BAD_CODE)
135 INSTMUL(imul_11, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400001868, BAD_CODE)
136 INSTMUL(imul_12, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400002068, BAD_CODE)
137 INSTMUL(imul_13, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400002868, BAD_CODE)
138 INSTMUL(imul_14, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400003068, BAD_CODE)
139 INSTMUL(imul_15, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400003868, BAD_CODE)
141 #endif // _TARGET_AMD64_
143 // the hex codes in this file represent the instruction encoding as follows:
144 // 0x0000ff00 - modrm byte position
145 // 0x000000ff - last byte of opcode (before modrm)
146 // 0x00ff0000 - first byte of opcode
147 // 0xff000000 - middle byte of opcode, if needed (after first, before last)
149 // So a 1-byte opcode is: and with modrm:
150 // 0x00000011 0x0000RM11
152 // So a 2-byte opcode is: and with modrm:
153 // 0x00002211 0x0011RM22
155 // So a 3-byte opcode is: and with modrm:
156 // 0x00113322 0x2211RM33
158 // So a 4-byte opcode would be something like this:
161 #define PACK3(byte1,byte2,byte3) ((byte1 << 16) | (byte2 << 24) | byte3)
162 #define PACK2(byte1,byte2) ((byte1 << 16) | byte2)
163 #define SSEFLT(c) PACK3(0xf3, 0x0f, c)
164 #define SSEDBL(c) PACK3(0xf2, 0x0f, c)
165 #define PCKDBL(c) PACK3(0x66, 0x0f, c)
166 #define PCKFLT(c) PACK2(0x0f,c)
168 // These macros encode extra byte that is implicit in the macro.
169 #define PACK4(byte1,byte2,byte3,byte4) ((byte1 << 16) | (byte2 << 24) | byte3 | (byte4 << 8))
170 #define SSE38(c) PACK4(0x66, 0x0f, 0x38, c)
171 #define SSE3A(c) PACK4(0x66, 0x0f, 0x3A, c)
173 // VEX* encodes the implied leading opcode bytes in c1:
174 // 1: implied 0f, 2: implied 0f 38, 3: implied 0f 3a
175 #define VEX2INT(c1,c2) PACK3(c1, 0xc5, c2)
176 #define VEX3INT(c1,c2) PACK4(c1, 0xc5, 0x02, c2)
177 #define VEX3FLT(c1,c2) PACK4(c1, 0xc5, 0x02, c2)
179 // Please insert any SSE2 instructions between FIRST_SSE2_INSTRUCTION and LAST_SSE2_INSTRUCTION
180 INST3(FIRST_SSE2_INSTRUCTION, "FIRST_SSE2_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
182 // These are the SSE instructions used on x86
183 INST3( mov_i2xmm, "movd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6E)) // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg
184 INST3( mov_xmm2i, "movd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x7E)) // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg
185 INST3( pmovmskb, "pmovmskb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD7)) // Move the MSB bits of all bytes in a xmm reg to an int reg
186 INST3( movmskpd, "movmskpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x50)) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros.
187 INST3( movd, "movd" , 0, IUM_WR, 0, 0, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E))
188 INST3( movq, "movq" , 0, IUM_WR, 0, 0, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E))
189 INST3( movsdsse2, "movsd" , 0, IUM_WR, 0, 0, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10))
191 INST3( punpckldq, "punpckldq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x62))
193 INST3( xorps, "xorps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x57)) // XOR packed singles
195 INST3( cvttsd2si, "cvttsd2si" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x2C)) // cvt with trunc scalar double to signed DWORDs
197 #ifndef LEGACY_BACKEND
198 INST3( movntdq, "movntdq" , 0, IUM_WR, 0, 0, PCKDBL(0xE7), BAD_CODE, BAD_CODE)
199 INST3( movnti, "movnti" , 0, IUM_WR, 0, 0, PCKFLT(0xC3), BAD_CODE, BAD_CODE)
200 INST3( movntpd, "movntpd" , 0, IUM_WR, 0, 0, PCKDBL(0x2B), BAD_CODE, BAD_CODE)
201 INST3( movntps, "movntps" , 0, IUM_WR, 0, 0, PCKFLT(0x2B), BAD_CODE, BAD_CODE)
202 INST3( movdqu, "movdqu" , 0, IUM_WR, 0, 0, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F))
203 INST3( movdqa, "movdqa" , 0, IUM_WR, 0, 0, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F))
204 INST3( movlpd, "movlpd" , 0, IUM_WR, 0, 0, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12))
205 INST3( movlps, "movlps" , 0, IUM_WR, 0, 0, PCKFLT(0x13), BAD_CODE, PCKFLT(0x12))
206 INST3( movhpd, "movhpd" , 0, IUM_WR, 0, 0, PCKDBL(0x17), BAD_CODE, PCKDBL(0x16))
207 INST3( movhps, "movhps" , 0, IUM_WR, 0, 0, PCKFLT(0x17), BAD_CODE, PCKFLT(0x16))
208 INST3( movss, "movss" , 0, IUM_WR, 0, 0, SSEFLT(0x11), BAD_CODE, SSEFLT(0x10))
209 INST3( movapd, "movapd" , 0, IUM_WR, 0, 0, PCKDBL(0x29), BAD_CODE, PCKDBL(0x28))
210 INST3( movaps, "movaps" , 0, IUM_WR, 0, 0, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28))
211 INST3( movupd, "movupd" , 0, IUM_WR, 0, 0, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10))
212 INST3( movups, "movups" , 0, IUM_WR, 0, 0, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10))
213 INST3( movhlps, "movhlps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x12))
214 INST3( movlhps, "movlhps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x16))
215 INST3( movmskps, "movmskps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x50))
216 INST3( unpckhps, "unpckhps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x15))
217 INST3( unpcklps, "unpcklps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x14))
218 INST3( maskmovdqu, "maskmovdqu" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF7))
220 INST3( shufps, "shufps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0xC6))
221 INST3( shufpd, "shufpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC6))
223 INST3( punpckhdq, "punpckhdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6A))
225 INST3( lfence, "lfence" , 0, IUM_RD, 0, 0, 0x000FE8AE, BAD_CODE, BAD_CODE)
226 INST3( mfence, "mfence" , 0, IUM_RD, 0, 0, 0x000FF0AE, BAD_CODE, BAD_CODE)
227 INST3( prefetchnta, "prefetchnta" , 0, IUM_RD, 0, 0, 0x000F0018, BAD_CODE, BAD_CODE)
228 INST3( prefetcht0, "prefetcht0" , 0, IUM_RD, 0, 0, 0x000F0818, BAD_CODE, BAD_CODE)
229 INST3( prefetcht1, "prefetcht1" , 0, IUM_RD, 0, 0, 0x000F1018, BAD_CODE, BAD_CODE)
230 INST3( prefetcht2, "prefetcht2" , 0, IUM_RD, 0, 0, 0x000F1818, BAD_CODE, BAD_CODE)
231 INST3( sfence, "sfence" , 0, IUM_RD, 0, 0, 0x000FF8AE, BAD_CODE, BAD_CODE)
234 INST3( addps, "addps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x58)) // Add packed singles
235 INST3( addss, "addss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x58)) // Add scalar singles
236 INST3( addpd, "addpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x58)) // Add packed doubles
237 INST3( addsd, "addsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x58)) // Add scalar doubles
238 INST3( mulps, "mulps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x59)) // Multiply packed singles
239 INST3( mulss, "mulss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x59)) // Multiply scalar single
240 INST3( mulpd, "mulpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x59)) // Multiply packed doubles
241 INST3( mulsd, "mulsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x59)) // Multiply scalar doubles
242 INST3( subps, "subps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5C)) // Subtract packed singles
243 INST3( subss, "subss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5C)) // Subtract scalar singles
244 INST3( subpd, "subpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5C)) // Subtract packed doubles
245 INST3( subsd, "subsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5C)) // Subtract scalar doubles
246 INST3( minps, "minps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5D)) // Return Minimum packed singles
247 INST3( minss, "minss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5D)) // Return Minimum scalar single
248 INST3( minpd, "minpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5D)) // Return Minimum packed doubles
249 INST3( minsd, "minsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5D)) // Return Minimum scalar double
250 INST3( divps, "divps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5E)) // Divide packed singles
251 INST3( divss, "divss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5E)) // Divide scalar singles
252 INST3( divpd, "divpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5E)) // Divide packed doubles
253 INST3( divsd, "divsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5E)) // Divide scalar doubles
254 INST3( maxps, "maxps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5F)) // Return Maximum packed singles
255 INST3( maxss, "maxss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5F)) // Return Maximum scalar single
256 INST3( maxpd, "maxpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5F)) // Return Maximum packed doubles
257 INST3( maxsd, "maxsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5F)) // Return Maximum scalar double
258 INST3( xorpd, "xorpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x57)) // XOR packed doubles
259 INST3( andps, "andps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x54)) // AND packed singles
260 INST3( andpd, "andpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x54)) // AND packed doubles
261 INST3( sqrtps, "sqrtps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x51)) // Sqrt of packed singles
262 INST3( sqrtss, "sqrtss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x51)) // Sqrt of scalar single
263 INST3( sqrtpd, "sqrtpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x51)) // Sqrt of packed doubles
264 INST3( sqrtsd, "sqrtsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x51)) // Sqrt of scalar double
265 INST3( andnps, "andnps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x55)) // And-Not packed singles
266 INST3( andnpd, "andnpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x55)) // And-Not packed doubles
267 INST3( orps, "orps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x56)) // Or packed singles
268 INST3( orpd, "orpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x56)) // Or packed doubles
269 INST3( haddpd, "haddpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x7C)) // Horizontal add packed doubles
270 INST3( haddps, "haddps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x7C)) // Horizontal add packed floats
271 INST3( hsubpd, "hsubpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x7D)) // Horizontal subtract packed doubles
272 INST3( hsubps, "hsubps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x7D)) // Horizontal subtract packed floats
273 INST3( addsubps, "addsubps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xD0)) // Add/Subtract packed singles
274 INST3( addsubpd, "addsubpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD0)) // Add/Subtract packed doubles
276 // SSE 2 approx arith
277 INST3( rcpps, "rcpps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x53)) // Reciprocal of packed singles
278 INST3( rcpss, "rcpss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x53)) // Reciprocal of scalar single
279 INST3( rsqrtps, "rsqrtps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x52)) // Reciprocal Sqrt of packed singles
280 INST3( rsqrtss, "rsqrtss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x52)) // Reciprocal Sqrt of scalar single
283 INST3( cvtpi2ps, "cvtpi2ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2A)) // cvt packed DWORDs to singles
284 INST3( cvtsi2ss, "cvtsi2ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2A)) // cvt DWORD to scalar single
285 INST3( cvtpi2pd, "cvtpi2pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2A)) // cvt packed DWORDs to doubles
286 INST3( cvtsi2sd, "cvtsi2sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x2A)) // cvt DWORD to scalar double
287 INST3( cvttps2pi, "cvttps2pi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2C)) // cvt with trunc packed singles to DWORDs
288 INST3( cvttss2si, "cvttss2si", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2C)) // cvt with trunc scalar single to DWORD
289 INST3( cvttpd2pi, "cvttpd2pi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2C)) // cvt with trunc packed doubles to DWORDs
290 INST3( cvtps2pi, "cvtps2pi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2D)) // cvt packed singles to DWORDs
291 INST3( cvtss2si, "cvtss2si", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2D)) // cvt scalar single to DWORD
292 INST3( cvtpd2pi, "cvtpd2pi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2D)) // cvt packed doubles to DWORDs
293 INST3( cvtsd2si, "cvtsd2si", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x2D)) // cvt scalar double to DWORD
294 INST3( cvtps2pd, "cvtps2pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5A)) // cvt packed singles to doubles
295 INST3( cvtpd2ps, "cvtpd2ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5A)) // cvt packed doubles to singles
296 INST3( cvtss2sd, "cvtss2sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5A)) // cvt scalar single to scalar doubles
297 INST3( cvtsd2ss, "cvtsd2ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5A)) // cvt scalar double to scalar singles
298 INST3( cvtdq2ps, "cvtdq2ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5B)) // cvt packed DWORDs to singles
299 INST3( cvtps2dq, "cvtps2dq", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5B)) // cvt packed singles to DWORDs
300 INST3( cvttps2dq, "cvttps2dq", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5B)) // cvt with trunc packed singles to DWORDs
301 INST3( cvtpd2dq, "cvtpd2dq", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xE6)) // cvt packed doubles to DWORDs
302 INST3( cvttpd2dq, "cvttpd2dq", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE6)) // cvt with trunc packed doubles to DWORDs
303 INST3( cvtdq2pd, "cvtdq2pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0xE6)) // cvt packed DWORDs to doubles
305 // SSE2 comparison instructions
306 INST3( comiss, "comiss", 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2F)) // ordered compare singles
307 INST3( comisd, "comisd", 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2F)) // ordered compare doubles
308 INST3( ucomiss, "ucomiss", 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2E)) // unordered compare singles
309 INST3( ucomisd, "ucomisd", 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2E)) // unordered compare doubles
311 // SSE2 packed single/double comparison operations.
312 // Note that these instructions not only compare but also overwrite the first source.
313 INST3( cmpps, "cmpps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0xC2)) // compare packed singles
314 INST3( cmppd, "cmppd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC2)) // compare packed doubles
315 INST3( cmpss, "cmpss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0xC2)) // compare scalar singles
316 INST3( cmpsd, "cmpsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xC2)) // compare scalar doubles
318 //SSE2 packed integer operations
319 INST3( paddb, "paddb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFC)) // Add packed byte integers
320 INST3( paddw, "paddw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFD)) // Add packed word (16-bit) integers
321 INST3( paddd, "paddd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFE)) // Add packed double-word (32-bit) integers
322 INST3( paddq, "paddq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD4)) // Add packed quad-word (64-bit) integers
323 INST3( paddsb, "paddsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEC)) // Add packed signed byte integers and saturate the results
324 INST3( paddsw, "paddsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xED)) // Add packed signed word integers and saturate the results
325 INST3( paddusb, "paddusb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDC)) // Add packed unsigned byte integers and saturate the results
326 INST3( paddusw, "paddusw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDD)) // Add packed unsigned word integers and saturate the results
327 INST3( pavgb, "pavgb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE0)) // Average of packed byte integers
328 INST3( pavgw, "pavgw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE3)) // Average of packed word integers
329 INST3( psubb, "psubb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF8)) // Subtract packed word (16-bit) integers
330 INST3( psubw, "psubw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF9)) // Subtract packed word (16-bit) integers
331 INST3( psubd, "psubd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFA)) // Subtract packed double-word (32-bit) integers
332 INST3( psubq, "psubq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFB)) // subtract packed quad-word (64-bit) integers
333 INST3( pmaddwd, "pmaddwd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF5)) // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst
334 INST3( pmulhw, "pmulhw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE5)) // Multiply high the packed 16-bit signed integers
335 INST3( pmulhuw, "pmulhuw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE4)) // Multiply high the packed 16-bit unsigned integers
336 INST3( pmuludq, "pmuludq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF4)) // packed multiply 32-bit unsigned integers and store 64-bit result
337 INST3( pmullw, "pmullw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD5)) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result
338 INST3( pand, "pand" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDB)) // Packed bit-wise AND of two xmm regs
339 INST3( pandn, "pandn" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDF)) // Packed bit-wise AND NOT of two xmm regs
340 INST3( por, "por" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEB)) // Packed bit-wise OR of two xmm regs
341 INST3( pxor, "pxor" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEF)) // Packed bit-wise XOR of two xmm regs
342 INST3( psadbw, "psadbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF6)) // Compute the sum of absolute differences of packed unsigned 8-bit integers
343 INST3( psubsb, "psubsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE8)) // Subtract packed 8-bit integers in b from packed 8-bit integers in a using saturation
344 INST3( psubusb, "psubusb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD8)) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation
345 INST3( psubsw, "psubsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE9)) // Subtract packed 16-bit integers in b from packed 16-bit integers in a using saturation
346 INST3( psubusw, "psubusw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD9)) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation
348 // Note that the shift immediates share the same encoding between left and right-shift, and are distinguished by the Reg/Opcode,
349 // which is handled in emitxarch.cpp.
350 INST3( psrldq, "psrldq" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x73), BAD_CODE ) // Shift right logical of xmm reg by given number of bytes
351 INST3( pslldq, "pslldq" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x73), BAD_CODE ) // Shift left logical of xmm reg by given number of bytes
352 INST3( psllw, "psllw" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x71), PCKDBL(0xF1)) // Packed shift left logical of 16-bit integers
353 INST3( pslld, "pslld" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2)) // Packed shift left logical of 32-bit integers
354 INST3( psllq, "psllq" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3)) // Packed shift left logical of 64-bit integers
355 INST3( psrlw, "psrlw" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x71), PCKDBL(0xD1)) // Packed shift right logical of 16-bit integers
356 INST3( psrld, "psrld" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2)) // Packed shift right logical of 32-bit integers
357 INST3( psrlq, "psrlq" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3)) // Packed shift right logical of 64-bit integers
358 INST3( psraw, "psraw" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x71), PCKDBL(0xE1)) // Packed shift right arithmetic of 16-bit integers
359 INST3( psrad, "psrad" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2)) // Packed shift right arithmetic of 32-bit integers
361 INST3( pmaxub, "pmaxub" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDE)) // packed maximum unsigned bytes
362 INST3( pminub, "pminub" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDA)) // packed minimum unsigned bytes
363 INST3( pmaxsw, "pmaxsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEE)) // packed maximum signed words
364 INST3( pminsw, "pminsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEA)) // packed minimum signed words
365 INST3( pcmpeqd, "pcmpeqd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x76)) // Packed compare 32-bit integers for equality
366 INST3( pcmpgtd, "pcmpgtd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x66)) // Packed compare 32-bit signed integers for greater than
367 INST3( pcmpeqw, "pcmpeqw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x75)) // Packed compare 16-bit integers for equality
368 INST3( pcmpgtw, "pcmpgtw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x65)) // Packed compare 16-bit signed integers for greater than
369 INST3( pcmpeqb, "pcmpeqb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x74)) // Packed compare 8-bit integers for equality
370 INST3( pcmpgtb, "pcmpgtb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x64)) // Packed compare 8-bit signed integers for greater than
372 INST3( pshufd, "pshufd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x70)) // Packed shuffle of 32-bit integers
373 INST3( pshufhw, "pshufhw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x70)) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
374 INST3( pshuflw, "pshuflw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x70)) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
375 INST3( pextrw, "pextrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC5)) // Extract 16-bit value into a r32 with zero extended to 32-bits
376 INST3( pinsrw, "pinsrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC4)) // Insert word at index
378 INST3( punpckhbw, "punpckhbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x68)) // Packed logical (unsigned) widen ubyte to ushort (hi)
379 INST3( punpcklbw, "punpcklbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x60)) // Packed logical (unsigned) widen ubyte to ushort (lo)
380 INST3( punpckhqdq, "punpckhqdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6D)) // Packed logical (unsigned) widen uint to ulong (hi)
381 INST3( punpcklqdq, "punpcklqdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6C)) // Packed logical (unsigned) widen uint to ulong (lo)
382 INST3( punpckhwd, "punpckhwd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x69)) // Packed logical (unsigned) widen ushort to uint (hi)
383 INST3( punpcklwd, "punpcklwd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x61)) // Packed logical (unsigned) widen ushort to uint (lo)
384 INST3( unpckhpd, "unpckhpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x15)) // Packed logical (unsigned) widen ubyte to ushort (hi)
385 INST3( unpcklpd, "unpcklpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x14)) // Packed logical (unsigned) widen ubyte to ushort (hi)
387 INST3( packssdw, "packssdw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6B)) // Pack (narrow) int to short with saturation
388 INST3( packsswb, "packsswb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x63)) // Pack (narrow) short to byte with saturation
389 INST3( packuswb, "packuswb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x67)) // Pack (narrow) short to unsigned byte with saturation
390 #endif // !LEGACY_BACKEND
391 INST3(LAST_SSE2_INSTRUCTION, "LAST_SSE2_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
393 #ifndef LEGACY_BACKEND
394 INST3(FIRST_SSE4_INSTRUCTION, "FIRST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
395 // enum name FP updmode rf wf MR MI RM
396 INST3( dpps, "dpps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x40)) // Packed dot product of two float vector regs
397 INST3( dppd, "dppd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x41)) // Packed dot product of two double vector regs
398 INST3( insertps, "insertps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x21)) // Insert packed single precision float value
399 INST3( pcmpeqq, "pcmpeqq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x29)) // Packed compare 64-bit integers for equality
400 INST3( pcmpgtq, "pcmpgtq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x37)) // Packed compare 64-bit integers for equality
401 INST3( pmulld, "pmulld" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x40)) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
402 INST3( ptest, "ptest" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x17)) // Packed logical compare
403 INST3( phaddd, "phaddd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x02)) // Packed horizontal add
404 INST3( pabsb, "pabsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1C)) // Packed absolute value of bytes
405 INST3( pabsw, "pabsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1D)) // Packed absolute value of 16-bit integers
406 INST3( pabsd, "pabsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1E)) // Packed absolute value of 32-bit integers
407 INST3( palignr, "palignr" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x0F)) // Packed Align Right
408 INST3( pmaddubsw, "pmaddubsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x04)) // Multiply and Add Packed Signed and Unsigned Bytes
409 INST3( pmulhrsw, "pmulhrsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0B)) // Packed Multiply High with Round and Scale
410 INST3( pshufb, "pshufb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x00)) // Packed Shuffle Bytes
411 INST3( psignb, "psignb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x08)) // Packed SIGN
412 INST3( psignw, "psignw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x09)) // Packed SIGN
413 INST3( psignd, "psignd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0A)) // Packed SIGN
414 INST3( pminsb, "pminsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x38)) // packed minimum signed bytes
415 INST3( pminsd, "pminsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x39)) // packed minimum 32-bit signed integers
416 INST3( pminuw, "pminuw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3A)) // packed minimum 16-bit unsigned integers
417 INST3( pminud, "pminud" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3B)) // packed minimum 32-bit unsigned integers
418 INST3( pmaxsb, "pmaxsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3C)) // packed maximum signed bytes
419 INST3( pmaxsd, "pmaxsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3D)) // packed maximum 32-bit signed integers
420 INST3( pmaxuw, "pmaxuw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3E)) // packed maximum 16-bit unsigned integers
421 INST3( pmaxud, "pmaxud" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3F)) // packed maximum 32-bit unsigned integers
422 INST3( pmovsxbw, "pmovsxbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x20)) // Packed sign extend byte to short
423 INST3( pmovsxbd, "pmovsxbd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x21)) // Packed sign extend byte to int
424 INST3( pmovsxbq, "pmovsxbq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x22)) // Packed sign extend byte to long
425 INST3( pmovsxwd, "pmovsxwd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x23)) // Packed sign extend short to int
426 INST3( pmovsxwq, "pmovsxwq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x24)) // Packed sign extend short to long
427 INST3( pmovsxdq, "pmovsxdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x25)) // Packed sign extend int to long
428 INST3( pmovzxbw, "pmovzxbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x30)) // Packed zero extend byte to short
429 INST3( pmovzxbd, "pmovzxbd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x31)) // Packed zero extend byte to intg
430 INST3( pmovzxbq, "pmovzxbq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x32)) // Packed zero extend byte to lon
431 INST3( pmovzxwd, "pmovzxwd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x33)) // Packed zero extend short to int
432 INST3( pmovzxwq, "pmovzxwq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x34)) // Packed zero extend short to long
433 INST3( pmovzxdq, "pmovzxdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x35)) // Packed zero extend int to long
434 INST3( packusdw, "packusdw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2B)) // Pack (narrow) int to unsigned short with saturation
435 INST3( roundps, "roundps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x08)) // Round packed single precision floating-point values
436 INST3( roundss, "roundss" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x0A)) // Round scalar single precision floating-point values
437 INST3( roundpd, "roundpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x09)) // Round packed double precision floating-point values
438 INST3( roundsd, "roundsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x0B)) // Round scalar double precision floating-point values
439 INST3( pmuldq, "pmuldq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x28)) // packed multiply 32-bit signed integers and store 64-bit result
440 INST3( blendps, "blendps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x0C)) // Blend Packed Single Precision Floating-Point Values
441 INST3( blendvps, "blendvps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x14)) // Variable Blend Packed Singles
442 INST3( blendpd, "blendpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x0D)) // Blend Packed Double Precision Floating-Point Values
443 INST3( blendvpd, "blendvpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x15)) // Variable Blend Packed Doubles
444 INST3( pblendw, "pblendw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x0E)) // Blend Packed Words
445 INST3( pblendvb, "pblendvb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x10)) // Variable Blend Packed Bytes
446 INST3( phaddw, "phaddw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x01)) // Packed horizontal add of 16-bit integers
447 INST3( phsubw, "phsubw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x05)) // Packed horizontal subtract of 16-bit integers
448 INST3( phsubd, "phsubd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x06)) // Packed horizontal subtract of 32-bit integers
449 INST3( phaddsw, "phaddsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x03)) // Packed horizontal add of 16-bit integers with saturation
450 INST3( phsubsw, "phsubsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x07)) // Packed horizontal subtract of 16-bit integers with saturation
451 INST3( lddqu, "lddqu" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xF0)) // Load Unaligned integer
452 INST3( movntdqa, "movntdqa" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2A)) // Load Double Quadword Non-Temporal Aligned Hint
453 INST3( movddup, "movddup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x12)) // Replicate Double FP Values
454 INST3( movsldup, "movsldup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x12)) // Replicate even-indexed Single FP Values
455 INST3( movshdup, "movshdup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x16)) // Replicate odd-indexed Single FP Values
456 INST3( phminposuw, "phminposuw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x41)) // Packed Horizontal Word Minimum
457 INST3( mpsadbw, "mpsadbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x42)) // Compute Multiple Packed Sums of Absolute Difference
458 INST3( pinsrb, "pinsrb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x20)) // Insert Byte
459 INST3( pinsrd, "pinsrd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x22)) // Insert Dword
460 INST3( pinsrq, "pinsrq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x22)) // Insert Qword
461 INST3( pextrb, "pextrb" , 0, IUM_WR, 0, 0, SSE3A(0x14), BAD_CODE, BAD_CODE) // Extract Byte
462 INST3( pextrd, "pextrd" , 0, IUM_WR, 0, 0, SSE3A(0x16), BAD_CODE, BAD_CODE) // Extract Dword
463 INST3( pextrq, "pextrq" , 0, IUM_WR, 0, 0, SSE3A(0x16), BAD_CODE, BAD_CODE) // Extract Qword
464 INST3( pextrw_sse41, "pextrw" , 0, IUM_WR, 0, 0, SSE3A(0x15), BAD_CODE, BAD_CODE) // Extract Word
465 INST3( extractps, "extractps" , 0, IUM_WR, 0, 0, SSE3A(0x17), BAD_CODE, BAD_CODE) // Extract Packed Floating-Point Values
467 INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
469 INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
470 // AVX only instructions
471 INST3( vbroadcastss, "broadcastss" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x18)) // Broadcast float value read from memory to entire ymm register
472 INST3( vbroadcastsd, "broadcastsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x19)) // Broadcast float value read from memory to entire ymm register
473 INST3( vpbroadcastb, "pbroadcastb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x78)) // Broadcast int8 value from reg/memory to entire ymm register
474 INST3( vpbroadcastw, "pbroadcastw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x79)) // Broadcast int16 value from reg/memory to entire ymm register
475 INST3( vpbroadcastd, "pbroadcastd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x58)) // Broadcast int32 value from reg/memory to entire ymm register
476 INST3( vpbroadcastq, "pbroadcastq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x59)) // Broadcast int64 value from reg/memory to entire ymm register
477 INST3( vextractf128, "extractf128" , 0, IUM_WR, 0, 0, SSE3A(0x19), BAD_CODE, BAD_CODE) // Extract 128-bit packed floating point values
478 INST3( vextracti128, "extracti128" , 0, IUM_WR, 0, 0, SSE3A(0x39), BAD_CODE, BAD_CODE) // Extract 128-bit packed integer values
479 INST3( vinsertf128, "insertf128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x18)) // Insert 128-bit packed floating point values
480 INST3( vinserti128, "inserti128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x38)) // Insert 128-bit packed integer values
481 INST3( vzeroupper, "zeroupper" , 0, IUM_WR, 0, 0, 0xC577F8, BAD_CODE, BAD_CODE) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix)
482 INST3( vperm2i128, "perm2i128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x46)) // Permute 128-bit halves of input register
483 INST3( vpermq, "permq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x00)) // Permute 64-bit of input register
484 INST3( vblendvps, "blendvps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x4A)) // Variable Blend Packed Singles
485 INST3( vblendvpd, "blendvpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x4B)) // Variable Blend Packed Doubles
486 INST3( vpblendvb, "pblendvb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x4C)) // Variable Blend Packed Bytes
487 INST3( vtestps, "testps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0E)) // Packed Bit Test
488 INST3( vtestpd, "testpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0F)) // Packed Bit Test
489 INST3( vpsrlvd, "psrlvd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x45)) // Variable Bit Shift Right Logical
490 INST3( vpsrlvq, "psrlvq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x45)) // Variable Bit Shift Right Logical
491 INST3( vpsravd, "psravd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x46)) // Variable Bit Shift Right Arithmetic
492 INST3( vpsllvd, "psllvd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x47)) // Variable Bit Shift Left Logical
493 INST3( vpsllvq, "psllvq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x47)) // Variable Bit Shift Left Logical
494 INST3( vpermilps, "permilps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x04)) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
495 INST3( vpermilpd, "permilpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x05)) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
496 INST3( vpermilpsvar, "permilpsvar" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0C)) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
497 INST3( vpermilpdvar, "permilpdvar" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0D)) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
498 INST3( vperm2f128, "perm2f128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x06)) // Permute Floating-Point Values
499 INST3(vbroadcastf128,"broadcastf128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1A)) // Broadcast packed float values read from memory to entire ymm register
500 INST3(vbroadcasti128,"broadcasti128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x5A)) // Broadcast packed integer values read from memory to entire ymm register
501 INST3(vmaskmovps, "maskmovps" ,0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2C)) // Conditional SIMD Packed Loads Float
502 INST3(vmaskmovpd, "maskmovpd" ,0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2D)) // Conditional SIMD Packed Loads Double
503 INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
505 // Scalar instructions in SSE4.2
506 INST3( crc32, "crc32" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PACK4(0xF2, 0x0F, 0x38, 0xF0))
509 INST3( lzcnt, "lzcnt" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0xBD))
512 INST3( popcnt, "popcnt" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0xB8))
513 #endif // !LEGACY_BACKEND
514 // enum name FP updmode rf wf R/M,R/M[reg] R/M,icon
516 INST2(ret , "ret" , 0, IUM_RD, 0, 0, 0x0000C3, 0x0000C2)
517 INST2(loop , "loop" , 0, IUM_RD, 0, 0, BAD_CODE, 0x0000E2)
518 INST2(call , "call" , 0, IUM_RD, 0, 1, 0x0010FF, 0x0000E8)
520 INST2(rol , "rol" , 0, IUM_RW, 0, 1, 0x0000D2, BAD_CODE)
521 INST2(rol_1 , "rol" , 0, IUM_RW, 0, 1, 0x0000D0, 0x0000D0)
522 INST2(rol_N , "rol" , 0, IUM_RW, 0, 1, 0x0000C0, 0x0000C0)
523 INST2(ror , "ror" , 0, IUM_RW, 0, 1, 0x0008D2, BAD_CODE)
524 INST2(ror_1 , "ror" , 0, IUM_RW, 0, 1, 0x0008D0, 0x0008D0)
525 INST2(ror_N , "ror" , 0, IUM_RW, 0, 1, 0x0008C0, 0x0008C0)
527 INST2(rcl , "rcl" , 0, IUM_RW, 1, 1, 0x0010D2, BAD_CODE)
528 INST2(rcl_1 , "rcl" , 0, IUM_RW, 1, 1, 0x0010D0, 0x0010D0)
529 INST2(rcl_N , "rcl" , 0, IUM_RW, 1, 1, 0x0010C0, 0x0010C0)
530 INST2(rcr , "rcr" , 0, IUM_RW, 1, 1, 0x0018D2, BAD_CODE)
531 INST2(rcr_1 , "rcr" , 0, IUM_RW, 1, 1, 0x0018D0, 0x0018D0)
532 INST2(rcr_N , "rcr" , 0, IUM_RW, 1, 1, 0x0018C0, 0x0018C0)
533 INST2(shl , "shl" , 0, IUM_RW, 0, 1, 0x0020D2, BAD_CODE)
534 INST2(shl_1 , "shl" , 0, IUM_RW, 0, 1, 0x0020D0, 0x0020D0)
535 INST2(shl_N , "shl" , 0, IUM_RW, 0, 1, 0x0020C0, 0x0020C0)
536 INST2(shr , "shr" , 0, IUM_RW, 0, 1, 0x0028D2, BAD_CODE)
537 INST2(shr_1 , "shr" , 0, IUM_RW, 0, 1, 0x0028D0, 0x0028D0)
538 INST2(shr_N , "shr" , 0, IUM_RW, 0, 1, 0x0028C0, 0x0028C0)
539 INST2(sar , "sar" , 0, IUM_RW, 0, 1, 0x0038D2, BAD_CODE)
540 INST2(sar_1 , "sar" , 0, IUM_RW, 0, 1, 0x0038D0, 0x0038D0)
541 INST2(sar_N , "sar" , 0, IUM_RW, 0, 1, 0x0038C0, 0x0038C0)
544 // enum name FP updmode rf wf R/M,R/M[reg]
546 INST1(r_movsb, "rep movsb" , 0, IUM_RD, 0, 0, 0x00A4F3)
547 INST1(r_movsd, "rep movsd" , 0, IUM_RD, 0, 0, 0x00A5F3)
548 #if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_)
549 INST1(r_movsq, "rep movsq" , 0, IUM_RD, 0, 0, 0xF3A548)
550 #endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_)
551 INST1(movsb , "movsb" , 0, IUM_RD, 0, 0, 0x0000A4)
552 INST1(movsd , "movsd" , 0, IUM_RD, 0, 0, 0x0000A5)
553 #if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_)
554 INST1(movsq, "movsq" , 0, IUM_RD, 0, 0, 0x00A548)
555 #endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_)
557 INST1(r_stosb, "rep stosb" , 0, IUM_RD, 0, 0, 0x00AAF3)
558 INST1(r_stosd, "rep stosd" , 0, IUM_RD, 0, 0, 0x00ABF3)
559 #if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_)
560 INST1(r_stosq, "rep stosq" , 0, IUM_RD, 0, 0, 0xF3AB48)
561 #endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_)
562 INST1(stosb, "stosb" , 0, IUM_RD, 0, 0, 0x0000AA)
563 INST1(stosd, "stosd" , 0, IUM_RD, 0, 0, 0x0000AB)
564 #if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_)
565 INST1(stosq, "stosq" , 0, IUM_RD, 0, 0, 0x00AB48)
566 #endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_)
568 INST1(int3 , "int3" , 0, IUM_RD, 0, 0, 0x0000CC)
569 INST1(nop , "nop" , 0, IUM_RD, 0, 0, 0x000090)
570 INST1(lock , "lock" , 0, IUM_RD, 0, 0, 0x0000F0)
571 INST1(leave , "leave" , 0, IUM_RD, 0, 0, 0x0000C9)
574 INST1(neg , "neg" , 0, IUM_RW, 0, 1, 0x0018F6)
575 INST1(not , "not" , 0, IUM_RW, 0, 1, 0x0010F6)
577 INST1(cdq , "cdq" , 0, IUM_RD, 0, 1, 0x000099)
578 INST1(idiv , "idiv" , 0, IUM_RD, 0, 1, 0x0038F6)
579 INST1(imulEAX, "imul" , 0, IUM_RD, 0, 1, 0x0028F6) // edx:eax = eax*op1
580 INST1(div , "div" , 0, IUM_RD, 0, 1, 0x0030F6)
581 INST1(mulEAX , "mul" , 0, IUM_RD, 0, 1, 0x0020F6)
583 INST1(sahf , "sahf" , 0, IUM_RD, 0, 1, 0x00009E)
585 INST1(xadd , "xadd" , 0, IUM_RW, 0, 1, 0x0F00C0)
586 INST1(cmpxchg, "cmpxchg" , 0, IUM_RW, 0, 1, 0x0F00B0)
588 INST1(shld , "shld" , 0, IUM_RW, 0, 1, 0x0F00A4)
589 INST1(shrd , "shrd" , 0, IUM_RW, 0, 1, 0x0F00AC)
591 // For RyuJIT/x86, we follow the x86 calling convention that requires
592 // us to return floating point value on the x87 FP stack, so we need
593 // these instructions regardless of whether we're using full stack fp.
595 INST1(fld , "fld" , 1, IUM_WR, 0, 0, 0x0000D9)
596 INST1(fstp , "fstp" , 1, IUM_WR, 0, 0, 0x0018D9)
597 #endif // _TARGET_X86
599 #if FEATURE_STACK_FP_X87
600 INST1(fnstsw , "fnstsw" , 1, IUM_WR, 1, 0, 0x0020DF)
601 INST1(fcom , "fcom" , 1, IUM_RD, 0, 1, 0x0010D8)
602 INST1(fcomp , "fcomp" , 1, IUM_RD, 0, 1, 0x0018D8)
603 INST1(fcompp , "fcompp" , 1, IUM_RD, 0, 1, 0x00D9DE)
604 INST1(fcomi , "fcomi" , 1, IUM_RD, 0, 1, 0x00F0DB)
605 INST1(fcomip , "fcomip" , 1, IUM_RD, 0, 1, 0x00F0DF)
607 INST1(fchs , "fchs" , 1, IUM_RW, 0, 1, 0x00E0D9)
608 INST1(fabs , "fabs" , 1, IUM_RW, 0, 1, 0x00E1D9)
609 INST1(fsin , "fsin" , 1, IUM_RW, 0, 1, 0x00FED9)
610 INST1(fcos , "fcos" , 1, IUM_RW, 0, 1, 0x00FFD9)
611 INST1(fsqrt , "fsqrt" , 1, IUM_RW, 0, 1, 0x00FAD9)
612 INST1(fldl2e , "fldl2e" , 1, IUM_RW, 0, 1, 0x00EAD9)
613 INST1(frndint, "frndint" , 1, IUM_RW, 0, 1, 0x00FCD9)
614 INST1(f2xm1 , "f2xm1" , 1, IUM_RW, 0, 1, 0x00F0D9)
615 INST1(fscale , "fscale" , 1, IUM_RW, 0, 1, 0x00FDD9)
617 INST1(fld1 , "fld1" , 1, IUM_WR, 0, 0, 0x00E8D9)
618 INST1(fldz , "fldz" , 1, IUM_WR, 0, 0, 0x00EED9)
619 INST1(fst , "fst" , 1, IUM_WR, 0, 0, 0x0010D9)
621 INST1(fadd , "fadd" , 1, IUM_RW, 0, 0, 0x0000D8)
622 INST1(faddp , "faddp" , 1, IUM_RW, 0, 0, 0x0000DA)
623 INST1(fsub , "fsub" , 1, IUM_RW, 0, 0, 0x0020D8)
624 INST1(fsubp , "fsubp" , 1, IUM_RW, 0, 0, 0x0028DA)
625 INST1(fsubr , "fsubr" , 1, IUM_RW, 0, 0, 0x0028D8)
626 INST1(fsubrp , "fsubrp" , 1, IUM_RW, 0, 0, 0x0020DA)
627 INST1(fmul , "fmul" , 1, IUM_RW, 0, 0, 0x0008D8)
628 INST1(fmulp , "fmulp" , 1, IUM_RW, 0, 0, 0x0008DA)
629 INST1(fdiv , "fdiv" , 1, IUM_RW, 0, 0, 0x0030D8)
630 INST1(fdivp , "fdivp" , 1, IUM_RW, 0, 0, 0x0038DA)
631 INST1(fdivr , "fdivr" , 1, IUM_RW, 0, 0, 0x0038D8)
632 INST1(fdivrp , "fdivrp" , 1, IUM_RW, 0, 0, 0x0030DA)
634 INST1(fxch , "fxch" , 1, IUM_RW, 0, 0, 0x00C8D9)
635 INST1(fprem , "fprem" , 0, IUM_RW, 0, 1, 0x00F8D9)
637 INST1(fild , "fild" , 1, IUM_RD, 0, 0, 0x0000DB)
638 INST1(fildl , "fild" , 1, IUM_RD, 0, 0, 0x0028DB)
639 INST1(fistp , "fistp" , 1, IUM_WR, 0, 0, 0x0018DB)
640 INST1(fistpl , "fistp" , 1, IUM_WR, 0, 0, 0x0038DB)
642 INST1(fldcw , "fldcw" , 1, IUM_RD, 0, 0, 0x0028D9)
643 INST1(fnstcw , "fnstcw" , 1, IUM_WR, 0, 0, 0x0038D9)
644 #endif // FEATURE_STACK_FP_X87
646 INST1(seto , "seto" , 0, IUM_WR, 1, 0, 0x0F0090)
647 INST1(setno , "setno" , 0, IUM_WR, 1, 0, 0x0F0091)
648 INST1(setb , "setb" , 0, IUM_WR, 1, 0, 0x0F0092)
649 INST1(setae , "setae" , 0, IUM_WR, 1, 0, 0x0F0093)
650 INST1(sete , "sete" , 0, IUM_WR, 1, 0, 0x0F0094)
651 INST1(setne , "setne" , 0, IUM_WR, 1, 0, 0x0F0095)
652 INST1(setbe , "setbe" , 0, IUM_WR, 1, 0, 0x0F0096)
653 INST1(seta , "seta" , 0, IUM_WR, 1, 0, 0x0F0097)
654 INST1(sets , "sets" , 0, IUM_WR, 1, 0, 0x0F0098)
655 INST1(setns , "setns" , 0, IUM_WR, 1, 0, 0x0F0099)
656 INST1(setpe , "setpe" , 0, IUM_WR, 1, 0, 0x0F009A)
657 INST1(setpo , "setpo" , 0, IUM_WR, 1, 0, 0x0F009B)
658 INST1(setl , "setl" , 0, IUM_WR, 1, 0, 0x0F009C)
659 INST1(setge , "setge" , 0, IUM_WR, 1, 0, 0x0F009D)
660 INST1(setle , "setle" , 0, IUM_WR, 1, 0, 0x0F009E)
661 INST1(setg , "setg" , 0, IUM_WR, 1, 0, 0x0F009F)
663 #ifdef _TARGET_AMD64_
664 // A jump with rex prefix. This is used for register indirect
666 INST1(rex_jmp, "rex.jmp" , 0, IUM_RD, 0, 0, 0x0020FE)
669 INST1(i_jmp , "jmp" , 0, IUM_RD, 0, 0, 0x0020FE)
671 INST0(jmp , "jmp" , 0, IUM_RD, 0, 0, 0x0000EB)
672 INST0(jo , "jo" , 0, IUM_RD, 1, 0, 0x000070)
673 INST0(jno , "jno" , 0, IUM_RD, 1, 0, 0x000071)
674 INST0(jb , "jb" , 0, IUM_RD, 1, 0, 0x000072)
675 INST0(jae , "jae" , 0, IUM_RD, 1, 0, 0x000073)
676 INST0(je , "je" , 0, IUM_RD, 1, 0, 0x000074)
677 INST0(jne , "jne" , 0, IUM_RD, 1, 0, 0x000075)
678 INST0(jbe , "jbe" , 0, IUM_RD, 1, 0, 0x000076)
679 INST0(ja , "ja" , 0, IUM_RD, 1, 0, 0x000077)
680 INST0(js , "js" , 0, IUM_RD, 1, 0, 0x000078)
681 INST0(jns , "jns" , 0, IUM_RD, 1, 0, 0x000079)
682 INST0(jpe , "jpe" , 0, IUM_RD, 1, 0, 0x00007A)
683 INST0(jpo , "jpo" , 0, IUM_RD, 1, 0, 0x00007B)
684 INST0(jl , "jl" , 0, IUM_RD, 1, 0, 0x00007C)
685 INST0(jge , "jge" , 0, IUM_RD, 1, 0, 0x00007D)
686 INST0(jle , "jle" , 0, IUM_RD, 1, 0, 0x00007E)
687 INST0(jg , "jg" , 0, IUM_RD, 1, 0, 0x00007F)
689 INST0(l_jmp , "jmp" , 0, IUM_RD, 0, 0, 0x0000E9)
690 INST0(l_jo , "jo" , 0, IUM_RD, 1, 0, 0x00800F)
691 INST0(l_jno , "jno" , 0, IUM_RD, 1, 0, 0x00810F)
692 INST0(l_jb , "jb" , 0, IUM_RD, 1, 0, 0x00820F)
693 INST0(l_jae , "jae" , 0, IUM_RD, 1, 0, 0x00830F)
694 INST0(l_je , "je" , 0, IUM_RD, 1, 0, 0x00840F)
695 INST0(l_jne , "jne" , 0, IUM_RD, 1, 0, 0x00850F)
696 INST0(l_jbe , "jbe" , 0, IUM_RD, 1, 0, 0x00860F)
697 INST0(l_ja , "ja" , 0, IUM_RD, 1, 0, 0x00870F)
698 INST0(l_js , "js" , 0, IUM_RD, 1, 0, 0x00880F)
699 INST0(l_jns , "jns" , 0, IUM_RD, 1, 0, 0x00890F)
700 INST0(l_jpe , "jpe" , 0, IUM_RD, 1, 0, 0x008A0F)
701 INST0(l_jpo , "jpo" , 0, IUM_RD, 1, 0, 0x008B0F)
702 INST0(l_jl , "jl" , 0, IUM_RD, 1, 0, 0x008C0F)
703 INST0(l_jge , "jge" , 0, IUM_RD, 1, 0, 0x008D0F)
704 INST0(l_jle , "jle" , 0, IUM_RD, 1, 0, 0x008E0F)
705 INST0(l_jg , "jg" , 0, IUM_RD, 1, 0, 0x008F0F)
707 INST0(align , "align" , 0, IUM_RD, 0, 0, BAD_CODE)
709 /*****************************************************************************/
716 /*****************************************************************************/