mpn/x86_64/fat/fat_entry.asm

   1 dnl  x86 fat binary entrypoints.
   2
   3 dnl  Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
   4 dnl  Torbjorn Granlund (port to x86_64)
   5
   6 dnl  Copyright 2003, 2009 Free Software Foundation, Inc.
   7 dnl
   8 dnl  This file is part of the GNU MP Library.
   9 dnl
  10 dnl  The GNU MP Library is free software; you can redistribute it and/or
  11 dnl  modify it under the terms of the GNU Lesser General Public License as
  12 dnl  published by the Free Software Foundation; either version 3 of the
  13 dnl  License, or (at your option) any later version.
  14 dnl
  15 dnl  The GNU MP Library is distributed in the hope that it will be useful,
  16 dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 dnl  Lesser General Public License for more details.
  19 dnl
  20 dnl  You should have received a copy of the GNU Lesser General Public License
  21 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  22
  23 include(`../config.m4')
  24
  25
  26 dnl  Forcibly disable profiling.
  27 dnl
  28 dnl  The entrypoints and inits are small enough not to worry about, the real
  29 dnl  routines arrived at will have any profiling.  Also, the way the code
  30 dnl  here ends with a jump means we won't work properly with the
  31 dnl  "instrument" profiling scheme anyway.
  32
  33 define(`WANT_PROFILING',no)
  34
  35
  36         TEXT
  37
  38
  39 dnl  Usage: FAT_ENTRY(name, offset)
  40 dnl
  41 dnl  Emit a fat binary entrypoint function of the given name.  This is the
  42 dnl  normal entry for applications, eg. __gmpn_add_n.
  43 dnl
  44 dnl  The code simply jumps through the function pointer in __gmpn_cpuvec at
  45 dnl  the given "offset" (in bytes).
  46 dnl
  47 dnl  For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be
  48 dnl  fine for all x86s.
  49 dnl
  50 dnl  For PIC, the jumps are 20 bytes each, and are best aligned to 16 to
  51 dnl  ensure at least the first two instructions don't cross a cache line
  52 dnl  boundary.
  53 dnl
  54 dnl  Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE
  55 dnl  grepping in configure, stopping that code trying to eval something with
  56 dnl  $1 in it.
  57
  58 define(FAT_ENTRY,
  59 m4_assert_numargs(2)
  60 `       ALIGN(ifdef(`PIC',16,8))
  61 `'PROLOGUE($1)
  62 ifdef(`PIC',
  63 `       LEA(    GSYM_PREFIX`'__gmpn_cpuvec, %rax)
  64         jmp     *$2(%rax)
  65 ',`dnl non-PIC
  66         jmp     *GSYM_PREFIX`'__gmpn_cpuvec+$2
  67 ')
  68 EPILOGUE()
  69 ')
  70
  71
  72 dnl  FAT_ENTRY for each CPUVEC_FUNCS_LIST
  73 dnl
  74
  75 define(`CPUVEC_offset',0)
  76 foreach(i,
  77 `FAT_ENTRY(MPN(i),CPUVEC_offset)
  78 define(`CPUVEC_offset',eval(CPUVEC_offset + 8))',
  79 CPUVEC_FUNCS_LIST)
  80
  81
  82 dnl  Usage: FAT_INIT(name, offset)
  83 dnl
  84 dnl  Emit a fat binary initializer function of the given name.  These
  85 dnl  functions are the initial values for the pointers in __gmpn_cpuvec.
  86 dnl
  87 dnl  The code simply calls __gmpn_cpuvec_init, and then jumps back through
  88 dnl  the __gmpn_cpuvec pointer, at the given "offset" (in bytes).
  89 dnl  __gmpn_cpuvec_init will have stored the address of the selected
  90 dnl  implementation there.
  91 dnl
  92 dnl  Only one of these routines will be executed, and only once, since after
  93 dnl  that all the __gmpn_cpuvec pointers go to real routines.  So there's no
  94 dnl  need for anything special here, just something small and simple.  To
  95 dnl  keep code size down, "fat_init" is a shared bit of code, arrived at
  96 dnl  with the offset in %al.  %al is used since the movb instruction is 2
  97 dnl  bytes where %eax would be 4.
  98 dnl
  99 dnl  Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the
 100 dnl  HAVE_NATIVE grepping in configure, preventing that code trying to eval
 101 dnl  something with $1 in it.
 102 dnl
 103 dnl  We need to preserve parameter registers over the __gmpn_cpuvec_init call
 104
 105 define(FAT_INIT,
 106 m4_assert_numargs(2)
 107 `PROLOGUE($1)
 108         mov     $`'$2, %al
 109         jmp     L(fat_init)
 110 EPILOGUE()
 111 ')
 112
 113 L(fat_init):
 114         C al    __gmpn_cpuvec byte offset
 115
 116         movzbl  %al, %eax
 117         push    %rdi
 118         push    %rsi
 119         push    %rdx
 120         push    %rcx
 121         push    %r8
 122         push    %r9
 123         push    %rax
 124         CALL(   __gmpn_cpuvec_init)
 125         pop     %rax
 126         pop     %r9
 127         pop     %r8
 128         pop     %rcx
 129         pop     %rdx
 130         pop     %rsi
 131         pop     %rdi
 132 ifdef(`PIC',`
 133         LEA(    GSYM_PREFIX`'__gmpn_cpuvec, %r10)
 134         jmp     *(%r10,%rax)
 135 ',`dnl non-PIC
 136         jmp     *GSYM_PREFIX`'__gmpn_cpuvec(%rax)
 137 ')
 138
 139 dnl  FAT_INIT for each CPUVEC_FUNCS_LIST
 140 dnl
 141
 142 define(`CPUVEC_offset',0)
 143 foreach(i,
 144 `FAT_INIT(MPN(i`'_init),CPUVEC_offset)
 145 define(`CPUVEC_offset',eval(CPUVEC_offset + 8))',
 146 CPUVEC_FUNCS_LIST)
 147
 148
 149
 150 C long __gmpn_cpuid (char dst[12], int id);
 151 C
 152 C This is called only once, so just something simple and compact is fine.
 153
 154
 155 PROLOGUE(__gmpn_cpuid)
 156         mov     %rbx, %r8
 157         mov     %esi, %eax
 158         cpuid
 159         mov     %ebx, (%rdi)
 160         mov     %edx, 4(%rdi)
 161         mov     %ecx, 8(%rdi)
 162         mov     %r8, %rbx
 163         ret
 164 EPILOGUE()