mpi/i586/mpih-rshift.S

   1 /* i80586   rshift
   2  *
   3  *      Copyright (C) 1992, 1994, 1998,
   4  *                    2001 Free Software Foundation, Inc.
   5  *
   6  * This file is part of GnuPG.
   7  *
   8  * GnuPG is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * GnuPG is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
  21  * USA.
  22  *
  23  * Note: This code is heavily based on the GNU MP Library.
  24  *       Actually it's the same code with only minor changes in the
  25  *       way the data is stored; this is to support the abstraction
  26  *       of an optional secure memory allocation which may be used
  27  *       to avoid revealing of sensitive data due to paging etc.
  28  *       The GNU MP Library itself is published under the LGPL;
  29  *       however I decided to publish this code under the plain GPL.
  30  */
  31
  32
  33 #include "sysdep.h"
  34 #include "asm-syntax.h"
  35
  36
  37
  38 /*******************
  39  * mpi_limb_t
  40  * mpihelp_rshift( mpi_ptr_t wp,        (sp + 4)
  41  *                 mpi_ptr_t up,        (sp + 8)
  42  *                 mpi_size_t usize,    (sp + 12)
  43  *                 unsigned cnt)        (sp + 16)
  44  */
  45
  46 .text
  47         ALIGN (3)
  48         .globl C_SYMBOL_NAME(mpihelp_rshift)
  49 C_SYMBOL_NAME(mpihelp_rshift:)
  50         pushl   %edi
  51         pushl   %esi
  52         pushl   %ebx
  53         pushl   %ebp
  54
  55         movl    20(%esp),%edi           /* res_ptr */
  56         movl    24(%esp),%esi           /* s_ptr */
  57         movl    28(%esp),%ebp           /* size */
  58         movl    32(%esp),%ecx           /* cnt */
  59
  60 /* We can use faster code for shift-by-1 under certain conditions.  */
  61         cmp     $1,%ecx
  62         jne     Rnormal
  63         leal    4(%edi),%eax
  64         cmpl    %esi,%eax
  65         jnc     Rspecial                /* jump if res_ptr + 1 >= s_ptr */
  66         leal    (%edi,%ebp,4),%eax
  67         cmpl    %eax,%esi
  68         jnc     Rspecial                /* jump if s_ptr >= res_ptr + size */
  69
  70 Rnormal:
  71         movl    (%esi),%edx
  72         addl    $4,%esi
  73         xorl    %eax,%eax
  74         shrdl   %cl,%edx,%eax           /* compute carry limb */
  75         pushl   %eax                    /* push carry limb onto stack */
  76
  77         decl    %ebp
  78         pushl   %ebp
  79         shrl    $3,%ebp
  80         jz      Rend
  81
  82         movl    (%edi),%eax             /* fetch destination cache line */
  83
  84         ALIGN   (2)
  85 Roop:   movl    28(%edi),%eax           /* fetch destination cache line */
  86         movl    %edx,%ebx
  87
  88         movl    (%esi),%eax
  89         movl    4(%esi),%edx
  90         shrdl   %cl,%eax,%ebx
  91         shrdl   %cl,%edx,%eax
  92         movl    %ebx,(%edi)
  93         movl    %eax,4(%edi)
  94
  95         movl    8(%esi),%ebx
  96         movl    12(%esi),%eax
  97         shrdl   %cl,%ebx,%edx
  98         shrdl   %cl,%eax,%ebx
  99         movl    %edx,8(%edi)
 100         movl    %ebx,12(%edi)
 101
 102         movl    16(%esi),%edx
 103         movl    20(%esi),%ebx
 104         shrdl   %cl,%edx,%eax
 105         shrdl   %cl,%ebx,%edx
 106         movl    %eax,16(%edi)
 107         movl    %edx,20(%edi)
 108
 109         movl    24(%esi),%eax
 110         movl    28(%esi),%edx
 111         shrdl   %cl,%eax,%ebx
 112         shrdl   %cl,%edx,%eax
 113         movl    %ebx,24(%edi)
 114         movl    %eax,28(%edi)
 115
 116         addl    $32,%esi
 117         addl    $32,%edi
 118         decl    %ebp
 119         jnz     Roop
 120
 121 Rend:   popl    %ebp
 122         andl    $7,%ebp
 123         jz      Rend2
 124 Roop2:  movl    (%esi),%eax
 125         shrdl   %cl,%eax,%edx           /* compute result limb */
 126         movl    %edx,(%edi)
 127         movl    %eax,%edx
 128         addl    $4,%esi
 129         addl    $4,%edi
 130         decl    %ebp
 131         jnz     Roop2
 132
 133 Rend2:  shrl    %cl,%edx                /* compute most significant limb */
 134         movl    %edx,(%edi)             /* store it */
 135
 136         popl    %eax                    /* pop carry limb */
 137
 138         popl    %ebp
 139         popl    %ebx
 140         popl    %esi
 141         popl    %edi
 142         ret
 143
 144 /* We loop from least significant end of the arrays, which is only
 145    permissable if the source and destination don't overlap, since the
 146    function is documented to work for overlapping source and destination.
 147 */
 148
 149 Rspecial:
 150         leal    -4(%edi,%ebp,4),%edi
 151         leal    -4(%esi,%ebp,4),%esi
 152
 153         movl    (%esi),%edx
 154         subl    $4,%esi
 155
 156         decl    %ebp
 157         pushl   %ebp
 158         shrl    $3,%ebp
 159
 160         shrl    $1,%edx
 161         incl    %ebp
 162         decl    %ebp
 163         jz      RLend
 164
 165         movl    (%edi),%eax             /* fetch destination cache line */
 166
 167         ALIGN   (2)
 168 RLoop:  movl    -28(%edi),%eax          /* fetch destination cache line */
 169         movl    %edx,%ebx
 170
 171         movl    (%esi),%eax
 172         movl    -4(%esi),%edx
 173         rcrl    $1,%eax
 174         movl    %ebx,(%edi)
 175         rcrl    $1,%edx
 176         movl    %eax,-4(%edi)
 177
 178         movl    -8(%esi),%ebx
 179         movl    -12(%esi),%eax
 180         rcrl    $1,%ebx
 181         movl    %edx,-8(%edi)
 182         rcrl    $1,%eax
 183         movl    %ebx,-12(%edi)
 184
 185         movl    -16(%esi),%edx
 186         movl    -20(%esi),%ebx
 187         rcrl    $1,%edx
 188         movl    %eax,-16(%edi)
 189         rcrl    $1,%ebx
 190         movl    %edx,-20(%edi)
 191
 192         movl    -24(%esi),%eax
 193         movl    -28(%esi),%edx
 194         rcrl    $1,%eax
 195         movl    %ebx,-24(%edi)
 196         rcrl    $1,%edx
 197         movl    %eax,-28(%edi)
 198
 199         leal    -32(%esi),%esi          /* use leal not to clobber carry */
 200         leal    -32(%edi),%edi
 201         decl    %ebp
 202         jnz     RLoop
 203
 204 RLend:  popl    %ebp
 205         sbbl    %eax,%eax               /* save carry in %eax */
 206         andl    $7,%ebp
 207         jz      RLend2
 208         addl    %eax,%eax               /* restore carry from eax */
 209 RLoop2: movl    %edx,%ebx
 210         movl    (%esi),%edx
 211         rcrl    $1,%edx
 212         movl    %ebx,(%edi)
 213
 214         leal    -4(%esi),%esi           /* use leal not to clobber carry */
 215         leal    -4(%edi),%edi
 216         decl    %ebp
 217         jnz     RLoop2
 218
 219         jmp     RL1
 220 RLend2: addl    %eax,%eax               /* restore carry from eax */
 221 RL1:    movl    %edx,(%edi)             /* store last limb */
 222
 223         movl    $0,%eax
 224         rcrl    $1,%eax
 225
 226         popl    %ebp
 227         popl    %ebx
 228         popl    %esi
 229         popl    %edi
 230         ret
 231