mpn/x86/fat/fat_entry.asm

   1 dnl  x86 fat binary entrypoints.
   2
   3 dnl  Copyright 2003 Free Software Foundation, Inc.
   4 dnl
   5 dnl  This file is part of the GNU MP Library.
   6 dnl
   7 dnl  The GNU MP Library is free software; you can redistribute it and/or
   8 dnl  modify it under the terms of the GNU Lesser General Public License as
   9 dnl  published by the Free Software Foundation; either version 3 of the
  10 dnl  License, or (at your option) any later version.
  11 dnl
  12 dnl  The GNU MP Library is distributed in the hope that it will be useful,
  13 dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 dnl  Lesser General Public License for more details.
  16 dnl
  17 dnl  You should have received a copy of the GNU Lesser General Public License
  18 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  19
  20 include(`../config.m4')
  21
  22
  23 dnl  Forcibly disable profiling.
  24 dnl
  25 dnl  The entrypoints and inits are small enough not to worry about, the real
  26 dnl  routines arrived at will have any profiling.  Also, the way the code
  27 dnl  here ends with a jump means we won't work properly with the
  28 dnl  "instrument" profiling scheme anyway.
  29
  30 define(`WANT_PROFILING',no)
  31
  32
  33         TEXT
  34
  35
  36 dnl  Usage: FAT_ENTRY(name, offset)
  37 dnl
  38 dnl  Emit a fat binary entrypoint function of the given name.  This is the
  39 dnl  normal entry for applications, eg. __gmpn_add_n.
  40 dnl
  41 dnl  The code simply jumps through the function pointer in __gmpn_cpuvec at
  42 dnl  the given "offset" (in bytes).
  43 dnl
  44 dnl  For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be
  45 dnl  fine for all x86s.
  46 dnl
  47 dnl  For PIC, the jumps are 20 bytes each, and are best aligned to 16 to
  48 dnl  ensure at least the first two instructions don't cross a cache line
  49 dnl  boundary.
  50 dnl
  51 dnl  Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE
  52 dnl  grepping in configure, stopping that code trying to eval something with
  53 dnl  $1 in it.
  54
  55 define(FAT_ENTRY,
  56 m4_assert_numargs(2)
  57 `       ALIGN(ifdef(`PIC',16,8))
  58 `'PROLOGUE($1)
  59 ifdef(`PIC',
  60 `       call    L(movl_eip_edx)
  61 L(entry_here$2):
  62         addl    $_GLOBAL_OFFSET_TABLE_+[.-L(entry_here$2)], %edx
  63         movl    GSYM_PREFIX`'__gmpn_cpuvec@GOT(%edx), %edx
  64         jmp     *m4_empty_if_zero($2)(%edx)
  65 ',`dnl non-PIC
  66         jmp     *GSYM_PREFIX`'__gmpn_cpuvec+$2
  67 ')
  68 EPILOGUE()
  69 ')
  70
  71
  72 dnl  FAT_ENTRY for each CPUVEC_FUNCS_LIST
  73 dnl
  74
  75 define(`CPUVEC_offset',0)
  76 foreach(i,
  77 `FAT_ENTRY(MPN(i),CPUVEC_offset)
  78 define(`CPUVEC_offset',eval(CPUVEC_offset + 4))',
  79 CPUVEC_FUNCS_LIST)
  80
  81 ifdef(`PIC',`
  82         ALIGN(8)
  83 L(movl_eip_edx):
  84         movl    (%esp), %edx
  85         ret_internal
  86 ')
  87
  88
  89 dnl  Usage: FAT_INIT(name, offset)
  90 dnl
  91 dnl  Emit a fat binary initializer function of the given name.  These
  92 dnl  functions are the initial values for the pointers in __gmpn_cpuvec.
  93 dnl
  94 dnl  The code simply calls __gmpn_cpuvec_init, and then jumps back through
  95 dnl  the __gmpn_cpuvec pointer, at the given "offset" (in bytes).
  96 dnl  __gmpn_cpuvec_init will have stored the address of the selected
  97 dnl  implementation there.
  98 dnl
  99 dnl  Only one of these routines will be executed, and only once, since after
 100 dnl  that all the __gmpn_cpuvec pointers go to real routines.  So there's no
 101 dnl  need for anything special here, just something small and simple.  To
 102 dnl  keep code size down, "fat_init" is a shared bit of code, arrived at
 103 dnl  with the offset in %al.  %al is used since the movb instruction is 2
 104 dnl  bytes where %eax would be 4.
 105 dnl
 106 dnl  Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the
 107 dnl  HAVE_NATIVE grepping in configure, preventing that code trying to eval
 108 dnl  something with $1 in it.
 109
 110 define(FAT_INIT,
 111 m4_assert_numargs(2)
 112 `PROLOGUE($1)
 113         movb    $`'$2, %al
 114         jmp     L(fat_init)
 115 EPILOGUE()
 116 ')
 117
 118 L(fat_init):
 119         C al    __gmpn_cpuvec byte offset
 120
 121         movsbl  %al, %eax
 122         pushl   %eax
 123
 124 ifdef(`PIC',`
 125         pushl   %ebx
 126         call    L(movl_eip_ebx)
 127 L(init_here):
 128         addl    $_GLOBAL_OFFSET_TABLE_+[.-L(init_here)], %ebx
 129         call    GSYM_PREFIX`'__gmpn_cpuvec_init@PLT
 130         movl    GSYM_PREFIX`'__gmpn_cpuvec@GOT(%ebx), %edx
 131         popl    %ebx
 132         popl    %eax
 133         jmp     *(%edx,%eax)
 134
 135 L(movl_eip_ebx):
 136         movl    (%esp), %ebx
 137         ret_internal
 138
 139 ',`dnl non-PIC
 140         call    GSYM_PREFIX`'__gmpn_cpuvec_init
 141         popl    %eax
 142         jmp     *GSYM_PREFIX`'__gmpn_cpuvec(%eax)
 143 ')
 144
 145 dnl  FAT_INIT for each CPUVEC_FUNCS_LIST
 146 dnl
 147
 148 define(`CPUVEC_offset',0)
 149 foreach(i,
 150 `FAT_INIT(MPN(i`'_init),CPUVEC_offset)
 151 define(`CPUVEC_offset',eval(CPUVEC_offset + 4))',
 152 CPUVEC_FUNCS_LIST)
 153
 154
 155
 156 C long __gmpn_cpuid (char dst[12], int id);
 157 C
 158 C This is called only once, so just something simple and compact is fine.
 159
 160 defframe(PARAM_ID,  8)
 161 defframe(PARAM_DST, 4)
 162 deflit(`FRAME',0)
 163
 164 PROLOGUE(__gmpn_cpuid)
 165         pushl   %esi            FRAME_pushl()
 166         pushl   %ebx            FRAME_pushl()
 167         movl    PARAM_ID, %eax
 168         cpuid
 169         movl    PARAM_DST, %esi
 170         movl    %ebx, (%esi)
 171         movl    %edx, 4(%esi)
 172         movl    %ecx, 8(%esi)
 173         popl    %ebx
 174         popl    %esi
 175         ret
 176 EPILOGUE()
 177
 178
 179 C int __gmpn_cpuid_available (void);
 180 C
 181 C Return non-zero if the cpuid instruction is available, which means late
 182 C model 80486 and higher.  80386 and early 80486 don't have cpuid.
 183 C
 184 C The test follows Intel AP-485 application note, namely that if bit 21 is
 185 C modifiable then cpuid is supported.  This test is reentrant and thread
 186 C safe, since of course any interrupt or context switch will preserve the
 187 C flags while we're tinkering with them.
 188 C
 189 C This is called only once, so just something simple and compact is fine.
 190
 191 PROLOGUE(__gmpn_cpuid_available)
 192         pushf
 193         popl    %ecx            C old flags
 194
 195         movl    %ecx, %edx
 196         xorl    $0x200000, %edx
 197         pushl   %edx
 198         popf
 199         pushf
 200         popl    %edx            C tweaked flags
 201
 202         movl    $1, %eax
 203         cmpl    %ecx, %edx
 204         jne     L(available)
 205         xorl    %eax, %eax      C not changed, so cpuid not available
 206
 207 L(available):
 208         ret
 209 EPILOGUE()