4 dnl m4 macros for x86 assembler.
7 dnl Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
10 dnl This file is part of the GNU MP Library.
12 dnl The GNU MP Library is free software; you can redistribute it and/or
13 dnl modify it under the terms of the GNU Lesser General Public License as
14 dnl published by the Free Software Foundation; either version 3 of the
15 dnl License, or (at your option) any later version.
17 dnl The GNU MP Library is distributed in the hope that it will be useful,
18 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
19 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 dnl Lesser General Public License for more details.
22 dnl You should have received a copy of the GNU Lesser General Public License
23 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
28 dnl m4 isn't perfect for processing BSD style x86 assembler code, the main
31 dnl 1. Doing define(foo,123) and then using foo in an addressing mode like
32 dnl foo(%ebx) expands as a macro rather than a constant. This is worked
33 dnl around by using deflit() from asm-defs.m4, instead of define().
35 dnl 2. Immediates in macro definitions need a space or `' to stop the $
36 dnl looking like a macro parameter. For example,
38 dnl define(foo, `mov $ 123, %eax')
40 dnl This is only a problem in macro definitions, not in ordinary text,
41 dnl and not in macro parameters like text passed to forloop() or ifdef().
44 deflit(BYTES_PER_MP_LIMB, 4)
47 dnl Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL. We
48 dnl undefine PIC since we don't need to be position independent in this
49 dnl case and definitely don't want the ELF style _GLOBAL_OFFSET_TABLE_ etc.
51 ifdef(`DLL_EXPORT',`undefine(`PIC')')
54 dnl Usage: CPUVEC_FUNCS_LIST
56 dnl A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
57 dnl order they appear in that structure.
59 define(CPUVEC_FUNCS_LIST,
82 dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
84 dnl In the x86 code we use explicit TEXT and ALIGN() calls in the code,
85 dnl since different alignments are wanted in various circumstances. So for
90 dnl PROLOGUE(mpn_add_n)
94 define(`PROLOGUE_cpu',
96 m4_assert_defined(`WANT_PROFILING')
101 ifelse(WANT_PROFILING,`prof', ` call_mcount')
102 ifelse(WANT_PROFILING,`gprof', ` call_mcount')
103 ifelse(WANT_PROFILING,`instrument',` call_instrument(enter)')
107 dnl Usage: COFF_TYPE(GSYM_PREFIX`'foo)
109 dnl Emit COFF style ".def ... .endef" type information for a function, when
110 dnl supported. The argument should include any GSYM_PREFIX.
112 dnl See autoconf macro GMP_ASM_COFF_TYPE for HAVE_COFF_TYPE.
116 m4_assert_defined(`HAVE_COFF_TYPE')
117 `ifelse(HAVE_COFF_TYPE,yes,
124 dnl Usage: call_mcount
126 dnl For `gprof' style profiling, %ebp is setup as a frame pointer. None of
127 dnl the assembler routines use %ebp this way, so it's done only for the
128 dnl benefit of mcount. glibc sysdeps/i386/i386-mcount.S shows how mcount
129 dnl gets the current function from (%esp) and the parent from 4(%ebp).
131 dnl For `prof' style profiling gcc generates mcount calls without setting
132 dnl up %ebp, and the same is done here.
134 define(`call_mcount',
135 m4_assert_numargs(-1)
136 m4_assert_defined(`WANT_PROFILING')
137 m4_assert_defined(`MCOUNT_PIC_REG')
138 m4_assert_defined(`MCOUNT_NONPIC_REG')
139 m4_assert_defined(`MCOUNT_PIC_CALL')
140 m4_assert_defined(`MCOUNT_NONPIC_CALL')
141 `ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,,
144 L(mcount_data_`'mcount_counter):
148 ifelse(WANT_PROFILING,`gprof',
155 L(mcount_here_`'mcount_counter):
156 addl $_GLOBAL_OFFSET_TABLE_+[.-L(mcount_here_`'mcount_counter)], %ebx
157 ifelse(MCOUNT_PIC_REG,,,
158 ` leal L(mcount_data_`'mcount_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG')
162 ifelse(MCOUNT_NONPIC_REG,,,
163 ` movl `$'L(mcount_data_`'mcount_counter), MCOUNT_NONPIC_REG
167 ifelse(WANT_PROFILING,`gprof',
170 define(`mcount_counter',incr(mcount_counter))
173 define(mcount_counter,1)
176 dnl Usage: call_instrument(enter|exit)
178 dnl Call __cyg_profile_func_enter or __cyg_profile_func_exit.
180 dnl For PIC, most routines don't require _GLOBAL_OFFSET_TABLE_ themselves
181 dnl so %ebx is just setup for these calls. It's a bit wasteful to repeat
182 dnl the setup for the exit call having done it earlier for the enter, but
183 dnl there's nowhere very convenient to hold %ebx through the length of a
184 dnl routine, in general.
186 dnl For PIC, because instrument_current_function will be within the current
187 dnl object file we can get it just as an offset from %eip, there's no need
190 dnl No attempt is made to maintain the stack alignment gcc generates with
191 dnl -mpreferred-stack-boundary. This wouldn't be hard, but it seems highly
192 dnl unlikely the instrumenting functions would be doing anything that'd
193 dnl benefit from alignment, in particular they're unlikely to be using
194 dnl doubles or long doubles on the stack.
196 dnl The FRAME scheme is used to conveniently account for the register saves
197 dnl before accessing the return address. Any previous value is saved and
198 dnl restored, since plenty of code keeps a value across a "ret" in the
199 dnl middle of a routine.
201 define(call_instrument,
205 ` pushl %eax FRAME_pushl() C return value
208 ` pushl %ebx FRAME_pushl()
210 L(instrument_here_`'instrument_count):
212 addl $_GLOBAL_OFFSET_TABLE_+[.-L(instrument_here_`'instrument_count)], %ebx
213 C use addl rather than leal to avoid old gas bugs, see mpn/x86/README
214 addl $instrument_current_function-L(instrument_here_`'instrument_count), %ecx
215 pushl m4_empty_if_zero(FRAME)(%esp) FRAME_pushl() C return addr
216 pushl %ecx FRAME_pushl() C this function
217 call GSYM_PREFIX`'__cyg_profile_func_$1@PLT
222 pushl m4_empty_if_zero(FRAME)(%esp) FRAME_pushl() C return addr
223 pushl $instrument_current_function FRAME_pushl() C this function
224 call GSYM_PREFIX`'__cyg_profile_func_$1
228 ` popl %eax C return value
231 define(`instrument_count',incr(instrument_count))
233 define(instrument_count,1)
236 dnl Usage: instrument_current_function
238 dnl Return the current function name for instrumenting purposes. This is
239 dnl PROLOGUE_current_function, but it sticks at the first such name seen.
241 dnl Sticking to the first name seen ensures that multiple-entrypoint
242 dnl functions like mpn_add_nc and mpn_add_n will make enter and exit calls
243 dnl giving the same function address.
245 define(instrument_current_function,
246 m4_assert_numargs(-1)
247 `ifdef(`instrument_current_function_seen',
248 `instrument_current_function_seen',
249 `define(`instrument_current_function_seen',PROLOGUE_current_function)dnl
250 PROLOGUE_current_function')')
253 dnl Usage: call_movl_eip_to_ebx
255 dnl Generate a call to L(movl_eip_to_ebx), and record the need for that
258 define(call_movl_eip_to_ebx,
259 m4_assert_numargs(-1)
260 `call L(movl_eip_to_ebx)
261 define(`movl_eip_to_ebx_needed',1)')
263 dnl Usage: generate_movl_eip_to_ebx
265 dnl Emit a L(movl_eip_to_ebx) routine, if needed and not already generated.
267 define(generate_movl_eip_to_ebx,
268 m4_assert_numargs(-1)
269 `ifelse(movl_eip_to_ebx_needed,1,
270 `ifelse(movl_eip_to_ebx_done,1,,
274 define(`movl_eip_to_ebx_done',1)
280 dnl Generate a "ret", but if doing instrumented profiling then call
281 dnl __cyg_profile_func_exit first.
284 m4_assert_numargs(-1)
285 m4_assert_defined(`WANT_PROFILING')
286 `ifelse(WANT_PROFILING,instrument,
289 generate_movl_eip_to_ebx
293 dnl Usage: ret_internal
295 dnl A plain "ret", without any __cyg_profile_func_exit call. This can be
296 dnl used for a return which is internal to some function, such as when
297 dnl getting %eip for PIC.
300 m4_assert_numargs(-1)
304 dnl Usage: ret_instrument
306 dnl Generate call to __cyg_profile_func_exit and then a ret. If a ret has
307 dnl already been seen from this function then jump to that chunk of code,
308 dnl rather than emitting it again.
310 define(ret_instrument,
311 m4_assert_numargs(-1)
312 `ifelse(m4_unquote(ret_instrument_seen_`'instrument_current_function),1,
313 `jmp L(instrument_exit_`'instrument_current_function)',
314 `define(ret_instrument_seen_`'instrument_current_function,1)
315 L(instrument_exit_`'instrument_current_function):
316 call_instrument(exit)
320 dnl Usage: _GLOBAL_OFFSET_TABLE_
322 dnl Expand to _GLOBAL_OFFSET_TABLE_ plus any necessary underscore prefix.
323 dnl This lets us write plain _GLOBAL_OFFSET_TABLE_ in SVR4 style, but still
324 dnl work with systems requiring an extra underscore such as OpenBSD.
326 dnl deflit is used so "leal _GLOBAL_OFFSET_TABLE_(%eax), %ebx" will come
327 dnl out right, though that form doesn't work properly in gas (see
330 deflit(_GLOBAL_OFFSET_TABLE_,
331 m4_assert_defined(`GOT_GSYM_PREFIX')
332 `GOT_GSYM_PREFIX`_GLOBAL_OFFSET_TABLE_'')
335 dnl --------------------------------------------------------------------------
336 dnl Various x86 macros.
340 dnl Usage: ALIGN_OFFSET(bytes,offset)
342 dnl Align to `offset' away from a multiple of `bytes'.
344 dnl This is useful for testing, for example align to something very strict
345 dnl and see what effect offsets from it have, "ALIGN_OFFSET(256,32)".
347 dnl Generally you wouldn't execute across the padding, but it's done with
348 dnl nop's so it'll work.
353 forloop(`i',1,$2,` nop
357 dnl Usage: defframe(name,offset)
359 dnl Make a definition like the following with which to access a parameter
360 dnl or variable on the stack.
362 dnl define(name,`FRAME+offset(%esp)')
364 dnl Actually m4_empty_if_zero(FRAME+offset) is used, which will save one
365 dnl byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp).
366 dnl Use define(`defframe_empty_if_zero_disabled',1) if for some reason the
367 dnl zero offset is wanted.
369 dnl The new macro also gets a check that when it's used FRAME is actually
370 dnl defined, and that the final %esp offset isn't negative, which would
371 dnl mean an attempt to access something below the current %esp.
373 dnl deflit() is used rather than a plain define(), so the new macro won't
374 dnl delete any following parenthesized expression. name(%edi) will come
375 dnl out say as 16(%esp)(%edi). This isn't valid assembler and should
376 dnl provoke an error, which is better than silently giving just 16(%esp).
378 dnl See README for more on the suggested way to access the stack frame.
383 m4_assert_defined(`FRAME')
384 `defframe_check_notbelow(`$1',$2,FRAME)dnl
385 defframe_empty_if_zero(FRAME+($2))(%esp)')')
387 dnl Called: defframe_empty_if_zero(expression)
388 define(defframe_empty_if_zero,
390 `ifelse(defframe_empty_if_zero_disabled,1,
392 `m4_empty_if_zero($1)')')
394 dnl Called: defframe_check_notbelow(`name',offset,FRAME)
395 define(defframe_check_notbelow,
397 `ifelse(eval(($3)+($2)<0),1,
398 `m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes
402 dnl Usage: FRAME_pushl()
404 dnl FRAME_addl_esp(n)
405 dnl FRAME_subl_esp(n)
407 dnl Adjust FRAME appropriately for a pushl or popl, or for an addl or subl
410 dnl Using these macros is completely optional. Sometimes it makes more
411 dnl sense to put explicit deflit(`FRAME',N) forms, especially when there's
412 dnl jumps and different sequences of FRAME values need to be used in
413 dnl different places.
417 m4_assert_defined(`FRAME')
418 `deflit(`FRAME',eval(FRAME+4))')
422 m4_assert_defined(`FRAME')
423 `deflit(`FRAME',eval(FRAME-4))')
425 define(FRAME_addl_esp,
427 m4_assert_defined(`FRAME')
428 `deflit(`FRAME',eval(FRAME-($1)))')
430 define(FRAME_subl_esp,
432 m4_assert_defined(`FRAME')
433 `deflit(`FRAME',eval(FRAME+($1)))')
436 dnl Usage: defframe_pushl(name)
438 dnl Do a combination FRAME_pushl() and a defframe() to name the stack
439 dnl location just pushed. This should come after a pushl instruction.
440 dnl Putting it on the same line works and avoids lengthening the code. For
443 dnl pushl %eax defframe_pushl(VAR_COUNTER)
445 dnl Notice the defframe() is done with an unquoted -FRAME thus giving its
446 dnl current value without tracking future changes.
448 define(defframe_pushl,
450 `FRAME_pushl()defframe(`$1',-FRAME)')
453 dnl --------------------------------------------------------------------------
454 dnl Assembler instruction macros.
458 dnl Usage: emms_or_femms
459 dnl femms_available_p
461 dnl femms_available_p expands to 1 or 0 according to whether the AMD 3DNow
462 dnl femms instruction is available. emms_or_femms expands to femms if
463 dnl available, or emms if not.
465 dnl emms_or_femms is meant for use in the K6 directory where plain K6
466 dnl (without femms) and K6-2 and K6-3 (with a slightly faster femms) are
467 dnl supported together.
469 dnl On K7 femms is no longer faster and is just an alias for emms, so plain
470 dnl emms may as well be used.
472 define(femms_available_p,
473 m4_assert_numargs(-1)
477 `HAVE_HOST_CPU_athlon')')
479 define(emms_or_femms,
480 m4_assert_numargs(-1)
481 `ifelse(femms_available_p,1,`femms',`emms')')
486 dnl Gas 2.9.1 which comes with FreeBSD 3.4 doesn't support femms, so the
487 dnl following is a replacement using .byte.
490 m4_assert_numargs(-1)
491 `.byte 15,14 C AMD 3DNow femms')
494 dnl Usage: jadcl0(op)
496 dnl Generate a jnc/incl as a substitute for adcl $0,op. Note this isn't an
497 dnl exact replacement, since it doesn't set the flags like adcl does.
499 dnl This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and
500 dnl mpn_sqr_basecase because on K6 an adcl is slow, the branch
501 dnl misprediction penalty is small, and the multiply algorithm used leads
502 dnl to a carry bit on average only 1/4 of the time.
504 dnl jadcl0_disabled can be set to 1 to instead generate an ordinary adcl
505 dnl for comparison. For example,
507 dnl define(`jadcl0_disabled',1)
509 dnl When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is
510 dnl the same size as an adcl. This makes it possible to use the exact same
511 dnl computed jump code when testing the relative speed of the two.
515 `ifelse(jadcl0_disabled,1,
517 `jnc L(jadcl0_`'jadcl0_counter)
519 L(jadcl0_`'jadcl0_counter):
520 define(`jadcl0_counter',incr(jadcl0_counter))')')
522 define(jadcl0_counter,1)
525 dnl Usage: x86_lookup(target, key,value, key,value, ...)
526 dnl x86_lookup_p(target, key,value, key,value, ...)
528 dnl Look for `target' among the `key' parameters.
530 dnl x86_lookup expands to the corresponding `value', or generates an error
531 dnl if `target' isn't found.
533 dnl x86_lookup_p expands to 1 if `target' is found, or 0 if not.
536 m4_assert_numargs_range(1,999)
537 `ifelse(eval($#<3),1,
538 `m4_error(`unrecognised part of x86 instruction: $1
540 `ifelse(`$1',`$2', `$3',
541 `x86_lookup(`$1',shift(shift(shift($@))))')')')
544 m4_assert_numargs_range(1,999)
545 `ifelse(eval($#<3),1, `0',
546 `ifelse(`$1',`$2', `1',
547 `x86_lookup_p(`$1',shift(shift(shift($@))))')')')
550 dnl Usage: x86_opcode_reg32(reg)
551 dnl x86_opcode_reg32_p(reg)
553 dnl x86_opcode_reg32 expands to the standard 3 bit encoding for the given
554 dnl 32-bit register, eg. `%ebp' turns into 5.
556 dnl x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0
559 define(x86_opcode_reg32,
561 `x86_lookup(`$1',x86_opcode_reg32_list)')
563 define(x86_opcode_reg32_p,
565 `x86_lookup_p(`$1',x86_opcode_reg32_list)')
567 define(x86_opcode_reg32_list,
578 dnl Usage: x86_opcode_tttn(cond)
580 dnl Expand to the 4-bit "tttn" field value for the given x86 branch
581 dnl condition (like `c', `ae', etc).
583 define(x86_opcode_tttn,
585 `x86_lookup(`$1',x86_opcode_ttn_list)')
587 define(x86_opcode_tttn_list,
590 `b', 2, `c', 2, `nae',2,
591 `nb', 3, `nc', 3, `ae', 3,
598 `p', 10, `pe', 10, `npo',10,
599 `np', 11, `npe',11, `po', 11,
606 dnl Usage: cmovCC(%srcreg,%dstreg)
608 dnl Emit a cmov instruction, using a .byte sequence, since various past
609 dnl versions of gas don't know cmov. For example,
611 dnl cmovz( %eax, %ebx)
613 dnl The source operand can only be a plain register. (m4 code implementing
614 dnl full memory addressing modes exists, believe it or not, but isn't
615 dnl currently needed and isn't included.)
617 dnl All the standard conditions are defined. Attempting to use one without
618 dnl the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke
619 dnl an error. This protects against writing something old gas wouldn't
622 dnl Called: define_cmov_many(cond,tttn,cond,tttn,...)
623 define(define_cmov_many,
624 `ifelse(m4_length(`$1'),0,,
625 `define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')')
627 dnl Called: define_cmov(cond,tttn)
628 dnl Emit basically define(cmov<cond>,`cmov_internal(<cond>,<ttn>,`$1',`$2')')
632 m4_instruction_wrapper()
634 `cmov_internal'(m4_doublequote($`'0),``$2'',dnl
635 m4_doublequote($`'1),m4_doublequote($`'2)))')
637 define_cmov_many(x86_opcode_tttn_list)
639 dnl Called: cmov_internal(name,tttn,src,dst)
640 define(cmov_internal,
645 eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl
649 dnl Usage: x86_opcode_regmmx(reg)
651 dnl Validate the given mmx register, and return its number, 0 to 7.
653 define(x86_opcode_regmmx,
655 `x86_lookup(`$1',x86_opcode_regmmx_list)')
657 define(x86_opcode_regmmx_list,
668 dnl Usage: psadbw(%srcreg,%dstreg)
670 dnl Oldish versions of gas don't know psadbw, in particular gas 2.9.1 on
671 dnl FreeBSD 3.3 and 3.4 doesn't, so instead emit .byte sequences. For
674 dnl psadbw( %mm1, %mm2)
676 dnl Only register->register forms are supported here, which suffices for
677 dnl the current code.
680 m4_instruction_wrapper()
683 eval(192+x86_opcode_regmmx(`$2')*8+x86_opcode_regmmx(`$1')) dnl
687 dnl Usage: Zdisp(inst,op,op,op)
689 dnl Generate explicit .byte sequences if necessary to force a byte-sized
690 dnl zero displacement on an instruction. For example,
692 dnl Zdisp( movl, 0,(%esi), %eax)
696 dnl .byte 139,70,0 C movl 0(%esi), %eax
698 dnl If the displacement given isn't 0, then normal assembler code is
699 dnl generated. For example,
701 dnl Zdisp( movl, 4,(%esi), %eax)
705 dnl movl 4(%esi), %eax
707 dnl This means a single Zdisp() form can be used with an expression for the
708 dnl displacement, and .byte will be used only if necessary. The
709 dnl displacement argument is eval()ed.
711 dnl Because there aren't many places a 0(reg) form is wanted, Zdisp is
712 dnl implemented with a table of instructions and encodings. A new entry is
713 dnl needed for any different operation or registers. The table is split
714 dnl into separate macros to avoid overflowing BSD m4 macro expansion space.
718 `define(`Zdisp_found',0)dnl
723 ifelse(Zdisp_found,0,
724 `m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4
728 Zdisp_match( adcl, 0,(%edx), %eax, `0x13,0x42,0x00', $@)`'dnl
729 Zdisp_match( adcl, 0,(%edx), %ebx, `0x13,0x5a,0x00', $@)`'dnl
730 Zdisp_match( adcl, 0,(%edx), %esi, `0x13,0x72,0x00', $@)`'dnl
731 Zdisp_match( addl, %ebx, 0,(%edi), `0x01,0x5f,0x00', $@)`'dnl
732 Zdisp_match( addl, %ecx, 0,(%edi), `0x01,0x4f,0x00', $@)`'dnl
733 Zdisp_match( addl, %esi, 0,(%edi), `0x01,0x77,0x00', $@)`'dnl
734 Zdisp_match( sbbl, 0,(%edx), %eax, `0x1b,0x42,0x00', $@)`'dnl
735 Zdisp_match( sbbl, 0,(%edx), %esi, `0x1b,0x72,0x00', $@)`'dnl
736 Zdisp_match( subl, %ecx, 0,(%edi), `0x29,0x4f,0x00', $@)`'dnl
737 Zdisp_match( movzbl, 0,(%eax,%ebp), %eax, `0x0f,0xb6,0x44,0x28,0x00', $@)`'dnl
738 Zdisp_match( movzbl, 0,(%ecx,%edi), %edi, `0x0f,0xb6,0x7c,0x39,0x00', $@)`'dnl
739 Zdisp_match( adc, 0,(%ebx,%ecx,4), %eax, `0x13,0x44,0x8b,0x00', $@)`'dnl
740 Zdisp_match( sbb, 0,(%ebx,%ecx,4), %eax, `0x1b,0x44,0x8b,0x00', $@)`'dnl
743 Zdisp_match( movl, %eax, 0,(%edi), `0x89,0x47,0x00', $@)`'dnl
744 Zdisp_match( movl, %ebx, 0,(%edi), `0x89,0x5f,0x00', $@)`'dnl
745 Zdisp_match( movl, %esi, 0,(%edi), `0x89,0x77,0x00', $@)`'dnl
746 Zdisp_match( movl, 0,(%ebx), %eax, `0x8b,0x43,0x00', $@)`'dnl
747 Zdisp_match( movl, 0,(%ebx), %esi, `0x8b,0x73,0x00', $@)`'dnl
748 Zdisp_match( movl, 0,(%edx), %eax, `0x8b,0x42,0x00', $@)`'dnl
749 Zdisp_match( movl, 0,(%esi), %eax, `0x8b,0x46,0x00', $@)`'dnl
750 Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl
751 Zdisp_match( mov, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl
752 Zdisp_match( mov, %eax, 0,(%edi,%ecx,4), `0x89,0x44,0x8f,0x00', $@)`'dnl
755 Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl
756 Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl
757 Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl
758 Zdisp_match( movq, 0,(%ebx,%ecx,4), %mm0, `0x0f,0x6f,0x44,0x8b,0x00', $@)`'dnl
759 Zdisp_match( movq, 0,(%edx), %mm0, `0x0f,0x6f,0x42,0x00', $@)`'dnl
760 Zdisp_match( movq, 0,(%esi), %mm0, `0x0f,0x6f,0x46,0x00', $@)`'dnl
761 Zdisp_match( movq, %mm0, 0,(%edi), `0x0f,0x7f,0x47,0x00', $@)`'dnl
762 Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl
763 Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl
764 Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl
767 Zdisp_match( movd, 0,(%eax,%ecx,4), %mm0, `0x0f,0x6e,0x44,0x88,0x00', $@)`'dnl
768 Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl
769 Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl
770 Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl
771 Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl
772 Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl
773 Zdisp_match( movd, %mm0, 0,(%edx,%ecx,4), `0x0f,0x7e,0x44,0x8a,0x00', $@)`'dnl
778 `ifelse(eval(m4_stringequal_p(`$1',`$6')
779 && m4_stringequal_p(`$2',0)
780 && m4_stringequal_p(`$3',`$8')
781 && m4_stringequal_p(`$4',`$9')),1,
782 `define(`Zdisp_found',1)dnl
784 ` .byte $5 C `$1 0$3, $4'',
787 `ifelse(eval(m4_stringequal_p(`$1',`$6')
788 && m4_stringequal_p(`$2',`$7')
789 && m4_stringequal_p(`$3',0)
790 && m4_stringequal_p(`$4',`$9')),1,
791 `define(`Zdisp_found',1)dnl
793 ` .byte $5 C `$1 $2, 0$4'',
794 ` $6 $7, $8$9')')')')
797 dnl Usage: shldl(count,src,dst)
798 dnl shrdl(count,src,dst)
799 dnl shldw(count,src,dst)
800 dnl shrdw(count,src,dst)
802 dnl Generate a double-shift instruction, possibly omitting a %cl count
803 dnl parameter if that's what the assembler requires, as indicated by
804 dnl WANT_SHLDL_CL in config.m4. For example,
806 dnl shldl( %cl, %eax, %ebx)
808 dnl turns into either
810 dnl shldl %cl, %eax, %ebx
814 dnl Immediate counts are always passed through unchanged. For example,
816 dnl shrdl( $2, %esi, %edi)
818 dnl shrdl $2, %esi, %edi
821 dnl If you forget to use the macro form "shldl( ...)" and instead write
822 dnl just a plain "shldl ...", an error results. This ensures the necessary
823 dnl variant treatment of %cl isn't accidentally bypassed.
825 define(define_shd_instruction,
828 m4_instruction_wrapper()
830 `shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl
831 m4_doublequote($`'2),m4_doublequote($`'3)))')
833 dnl Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc
834 define_shd_instruction(shldl)
835 define_shd_instruction(shrdl)
836 define_shd_instruction(shldw)
837 define_shd_instruction(shrdw)
839 dnl Called: shd_instruction(op,count,src,dst)
840 define(shd_instruction,
842 m4_assert_defined(`WANT_SHLDL_CL')
843 `ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,
845 ``$1' `$2', `$3', `$4'')')
848 dnl Usage: ASSERT([cond][,instructions])
850 dnl If WANT_ASSERT is 1, output the given instructions and expect the given
851 dnl flags condition to then be satisfied. For example,
853 dnl ASSERT(ne, `cmpl %eax, %ebx')
855 dnl The instructions can be omitted to just assert a flags condition with
856 dnl no extra calculation. For example,
860 dnl When `instructions' is not empty, a pushf/popf is added to preserve the
861 dnl flags, but the instructions themselves must preserve any registers that
862 dnl matter. FRAME is adjusted for the push and pop, so the instructions
863 dnl given can use defframe() stack variables.
865 dnl The condition can be omitted to just output the given instructions when
866 dnl assertion checking is wanted. In this case the pushf/popf is omitted.
869 dnl ASSERT(, `movl %eax, VAR_KEEPVAL')
872 m4_assert_numargs_range(1,2)
873 m4_assert_defined(`WANT_ASSERT')
874 `ifelse(WANT_ASSERT,1,
878 ifelse(`$2',,,` pushf ifdef(`FRAME',`FRAME_pushl()')')
880 j`$1' L(ASSERT_ok`'ASSERT_counter)
881 ud2 C assertion failed
882 L(ASSERT_ok`'ASSERT_counter):
883 ifelse(`$2',,,` popf ifdef(`FRAME',`FRAME_popl()')')
884 define(`ASSERT_counter',incr(ASSERT_counter))')')')
886 define(ASSERT_counter,1)
889 dnl Usage: movl_text_address(label,register)
891 dnl Get the address of a text segment label, using either a plain movl or a
892 dnl position-independent calculation, as necessary. For example,
894 dnl movl_code_address(L(foo),%eax)
896 dnl This macro is only meant for use in ASSERT()s or when testing, since
897 dnl the PIC sequence it generates will want to be done with a ret balancing
898 dnl the call on CPUs with return address branch prediction.
900 dnl The addl generated here has a backward reference to the label, and so
901 dnl won't suffer from the two forwards references bug in old gas (described
902 dnl in mpn/x86/README).
904 define(movl_text_address,
907 `call L(movl_text_address_`'movl_text_address_counter)
908 L(movl_text_address_`'movl_text_address_counter):
910 addl `$'$1-L(movl_text_address_`'movl_text_address_counter), $2
911 define(`movl_text_address_counter',incr(movl_text_address_counter))',
914 define(movl_text_address_counter,1)
917 dnl Usage: notl_or_xorl_GMP_NUMB_MASK(reg)
919 dnl Expand to either "notl `reg'" or "xorl $GMP_NUMB_BITS,`reg'" as
920 dnl appropriate for nails in use or not.
922 define(notl_or_xorl_GMP_NUMB_MASK,
924 `ifelse(GMP_NAIL_BITS,0,
926 `xorl $GMP_NUMB_MASK, `$1'')')
929 dnl Usage LEA(symbol,reg)
932 define(`EPILOGUE_cpu',
934 L(movl_eip_`'substr($2,1)):
939 call L(movl_eip_`'substr($2,1))
940 addl $_GLOBAL_OFFSET_TABLE_, $2
946 m4_assert_numargs_range(1,2)
948 ALIGN(ifelse($#,1,2,$2))
954 ` SIZE(`$1',.-`$1')')