add support for 32-bit ARM on Windows (#477)
authorPaul Monson <paulmon@users.noreply.github.com>
Fri, 26 Apr 2019 11:58:58 +0000 (04:58 -0700)
committerAnthony Green <green@moxielogic.com>
Fri, 26 Apr 2019 11:58:58 +0000 (07:58 -0400)
* add support for 32-bit ARM on Windows

* fix mismatched brace in appveyor.yml

* remove arm platform from appveyor.yml for now

* fix arm build

* fix typo

* fix assembler names

* try Visual Studio 2017

* add windows arm32 to .appveyor.yml

* update README.md

.appveyor.yml
Makefile.am
README.md
configure.host
msvcc.sh
src/arm/ffi.c
src/arm/ffitarget.h
src/arm/sysv_msvc_arm32.S [new file with mode: 0644]

index 5a4f0960388173fa9a34d7b34ab03320ac8866c6..3c021a8c15bbfe4226f5ea4cfb18795c927710fd 100644 (file)
@@ -8,9 +8,11 @@ shallow_clone: true
 #  32- and 64-bit clang/mingw
 #  and perhaps more.
 
-image: Visual Studio 2013
+image: Visual Studio 2017
 platform:
   - x64
+  - x86
+  - arm
 
 environment:
   global:
@@ -18,30 +20,42 @@ environment:
     CYG_CACHE: C:/cygwin/var/cache/setup
     CYG_MIRROR: http://mirrors.kernel.org/sourceware/cygwin/
   matrix:
-    - VSVER: 12
+    - VSVER: 15
 
 install:
   - ps: >-
       If ($env:Platform -Match "x86") {
           $env:VCVARS_PLATFORM="x86"
-          $env:BUILD="x86-pc-cygwin"
-          $env:HOST="x86-pc-windows"
+          $env:BUILD="i686-pc-cygwin"
+          $env:HOST="i686-pc-cygwin"
+          $env:MSVCC="/cygdrive/c/projects/libffi/msvcc.sh"
+          $env:SRC_ARCHITECTURE="x86"
+        } ElseIf ($env:Platform -Match "arm") {
+          $env:VCVARS_PLATFORM="x86_arm"
+          $env:BUILD="i686-pc-cygwin"
+          $env:HOST="arm-w32-cygwin"
+          $env:MSVCC="/cygdrive/c/projects/libffi/msvcc.sh -marm"
+          $env:SRC_ARCHITECTURE="arm"
         } Else {
           $env:VCVARS_PLATFORM="amd64"
           $env:BUILD="x86_64-w64-cygwin"
           $env:HOST="x86_64-w64-cygwin"
+          $env:ASSEMBLER="/cygdrive/c/projects/libffi/msvcc.sh -m64"
+          $env:SRC_ARCHITECTURE="x86"
       }
   - 'appveyor DownloadFile https://cygwin.com/setup-x86.exe -FileName setup.exe'
   - 'setup.exe -qnNdO -R "%CYG_ROOT%" -s "%CYG_MIRROR%" -l "%CYG_CACHE%" -P dejagnu >NUL'
   - '%CYG_ROOT%/bin/bash -lc "cygcheck -dc cygwin"'
+  - echo call VsDevCmd to set VS150COMNTOOLS
+  - call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\VsDevCmd.bat"
   - ps: $env:VSCOMNTOOLS=(Get-Content ("env:VS" + "$env:VSVER" + "0COMNTOOLS"))
   - echo "Using Visual Studio %VSVER%.0 at %VSCOMNTOOLS%"
-  - call "%VSCOMNTOOLS%\..\..\VC\vcvarsall.bat" %VCVARS_PLATFORM%
+  - call "%VSCOMNTOOLS%..\..\vc\Auxiliary\Build\vcvarsall.bat" %VCVARS_PLATFORM%
 
 build_script:
   - c:\cygwin\bin\sh -lc "(cd $OLDPWD; ./autogen.sh;)"
-  - c:\cygwin\bin\sh -lc "(cd $OLDPWD; ./configure CC='/cygdrive/c/projects/libffi/msvcc.sh -m64' CXX='/cygdrive/c/projects/libffi/msvcc.sh -m64' LD='link' CPP='cl -nologo -EP' CXXCPP='cl -nologo -EP' CPPFLAGS='-DFFI_BUILDING_DLL' AR='/cygdrive/c/projects/libffi/.travis/ar-lib lib' NM='dumpbin -symbols' STRIP=':' --build=$BUILD --host=$HOST;)"
-  - c:\cygwin\bin\sh -lc "(cd $OLDPWD; cp src/x86/ffitarget.h include; make; find .;)"
+  - c:\cygwin\bin\sh -lc "(cd $OLDPWD; ./configure CC='%MSVCC%' CXX='%MSVCC%' LD='link' CPP='cl -nologo -EP' CXXCPP='cl -nologo -EP' CPPFLAGS='-DFFI_BUILDING_DLL' AR='/cygdrive/c/projects/libffi/.travis/ar-lib lib' NM='dumpbin -symbols' STRIP=':' --build=$BUILD --host=$HOST;)"
+  - c:\cygwin\bin\sh -lc "(cd $OLDPWD; cp src/%SRC_ARCHITECTURE%/ffitarget.h include; make; find .;)"
   - c:\cygwin\bin\sh -lc "(cd $OLDPWD; cp `find . -name 'libffi-?.dll'` $HOST/testsuite/; make check; cat `find ./ -name libffi.log`)"
 
 # FIXME: "make check" currently fails.  It just looks like msvcc needs
index 27a11b26e1d9140d87c58931326b8cd18503f437..b74b73bbb1984be39ec32bb8153af460a3c8fd5f 100644 (file)
@@ -82,6 +82,7 @@ EXTRA_libffi_la_SOURCES = \
        src/alpha/ffi.c src/alpha/osf.S                                 \
        src/arc/ffi.c src/arc/arcompact.S                               \
        src/arm/ffi.c src/arm/sysv.S                                    \
+       src/arm/ffi.c src/arm/sysv_msvc_arm32.S                 \
        src/avr32/ffi.c src/avr32/sysv.S                                \
        src/bfin/ffi.c src/bfin/sysv.S                                  \
        src/cris/ffi.c src/cris/sysv.S                                  \
index 3caf2fa1a4464adbd88ddfb603102245034e3a53..17984e5c7129dda25e2ff8537878aae92e3df544 100644 (file)
--- a/README.md
+++ b/README.md
@@ -56,6 +56,7 @@ tested:
 | ARC             | Linux            | GCC                     |
 | ARM             | Linux            | GCC                     |
 | ARM             | iOS              | GCC                     |
+| ARM             | Windows          | MSVC                    |
 | AVR32           | Linux            | GCC                     |
 | Blackfin        | uClinux          | GCC                     |
 | HPPA            | HPUX             | GCC                     |
@@ -196,7 +197,8 @@ See the git log for details at http://github.com/libffi/libffi.
         Default to Microsoft's 64 bit long double ABI with Visual C++.
           GNU compiler uses 80 bits (128 in memory) FFI_GNUW64 ABI.
        Many new tests cases and bug fixes.
-    
+        Add Windows 32-bit arm support.
+        
     3.2.1 Nov-12-14
         Build fix for non-iOS AArch64 targets.
     
index 64addf251902866cb33034a2af1d4e3809f28f9c..8181a61184598a2eba7bcd7901931ee56b8c38ff 100644 (file)
@@ -23,6 +23,11 @@ case "${host}" in
        SOURCES="ffi.c arcompact.S"
        ;;
 
+  arm*-*-cygwin* | arm*-*-mingw* | arm*-*-win* )
+       TARGET=ARM_WIN32; TARGETDIR=arm
+       MSVC=1
+       ;;
+
   arm*-*-*)
        TARGET=ARM; TARGETDIR=arm
        SOURCES="ffi.c sysv.S"
@@ -242,6 +247,9 @@ esac
 
 # ... but some of the cases above share configury.
 case "${TARGET}" in
+  ARM_WIN32)
+       SOURCES="ffi.c sysv_msvc_arm32.S"
+       ;;
   MIPS)
        SOURCES="ffi.c o32.S n32.S"
        ;;
index 9a252f8ffcb4cd80085a461cf217c37d799dfde6..9c52f527c1ea4adebd88db9f90bcd9917376b896 100755 (executable)
--- a/msvcc.sh
+++ b/msvcc.sh
@@ -60,7 +60,7 @@ do
   case $1
   in
     --verbose)
-      $verbose=1
+      verbose=1
       shift 1
     ;;
     --version)
@@ -80,6 +80,11 @@ do
       safeseh=
       shift 1
     ;;
+    -marm)
+      ml='armasm'
+      safeseh=
+      shift 1
+    ;;
     -clang-cl)
       cl="clang-cl"
       shift 1
@@ -237,6 +242,7 @@ do
       else
         output="-Fe$2"
       fi
+      armasm_output="-o $2"
       if [ -n "$assembly" ]; then
         args="$args $output"
       else
@@ -289,13 +295,21 @@ if [ -n "$assembly" ]; then
     fi
     ppsrc="$outdir/$(basename $src|sed 's/.S$/.asm/g')"
 
+    if [ $ml = "armasm" ]; then
+      defines="$defines -D_M_ARM"
+    fi
+
     if test -n "$verbose"; then
       echo "$cl -nologo -EP $includes $defines $src > $ppsrc"
     fi
 
     "$cl" -nologo -EP $includes $defines $src > $ppsrc || exit $?
     output="$(echo $output | sed 's%/F[dpa][^ ]*%%g')"
-    args="-nologo $safeseh $single $output $ppsrc"
+    if [ $ml = "armasm" ]; then
+      args="-nologo -g -oldit $armasm_output $ppsrc -errorReport:prompt"
+    else
+      args="-nologo $safeseh $single $output $ppsrc"
+    fi
 
     if test -n "$verbose"; then
       echo "$ml $args"
index 66a67bdd78558a0dac2a0660a1231acec598e4c6..4e270718a3941560a796cf4446df4377c8fabfeb 100644 (file)
@@ -28,7 +28,7 @@
    DEALINGS IN THE SOFTWARE.
    ----------------------------------------------------------------------- */
 
-#ifdef __arm__
+#if defined(__arm__) || defined(_M_ARM)
 #include <fficonfig.h>
 #include <ffi.h>
 #include <ffi_common.h>
 #include <stdlib.h>
 #include "internal.h"
 
+#if defined(_MSC_VER) && defined(_M_ARM)
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
 #if FFI_EXEC_TRAMPOLINE_TABLE
 
 #ifdef __MACH__
 #endif
 
 #else
+#ifndef _M_ARM
 extern unsigned int ffi_arm_trampoline[2] FFI_HIDDEN;
+#else
+extern unsigned int ffi_arm_trampoline[3] FFI_HIDDEN;
+#endif
 #endif
 
 /* Forward declares. */
@@ -89,10 +98,20 @@ ffi_put_arg (ffi_type *ty, void *src, void *dst)
     case FFI_TYPE_SINT32:
     case FFI_TYPE_UINT32:
     case FFI_TYPE_POINTER:
+#ifndef _MSC_VER
     case FFI_TYPE_FLOAT:
+#endif
       *(UINT32 *)dst = *(UINT32 *)src;
       break;
 
+#ifdef _MSC_VER
+    // casting a float* to a UINT32* doesn't work on Windows
+    case FFI_TYPE_FLOAT:
+        *(uintptr_t *)dst = 0;
+        *(float *)dst = *(float *)src;
+        break;
+#endif
+
     case FFI_TYPE_SINT64:
     case FFI_TYPE_UINT64:
     case FFI_TYPE_DOUBLE:
@@ -572,15 +591,28 @@ ffi_prep_closure_loc (ffi_closure * closure,
   config[0] = closure;
   config[1] = closure_func;
 #else
-  memcpy (closure->tramp, ffi_arm_trampoline, 8);
+
+#ifndef _M_ARM
+  memcpy(closure->tramp, ffi_arm_trampoline, 8);
+#else
+  // cast away function type so MSVC doesn't set the lower bit of the function pointer
+  memcpy(closure->tramp, (void*)((uintptr_t)ffi_arm_trampoline & 0xFFFFFFFE), FFI_TRAMPOLINE_CLOSURE_OFFSET);
+#endif
+
 #if defined (__QNX__)
   msync(closure->tramp, 8, 0x1000000); /* clear data map */
   msync(codeloc, 8, 0x1000000);        /* clear insn map */
+#elif defined(_MSC_VER)
+  FlushInstructionCache(GetCurrentProcess(), closure->tramp, FFI_TRAMPOLINE_SIZE);
 #else
   __clear_cache(closure->tramp, closure->tramp + 8);   /* clear data map */
   __clear_cache(codeloc, codeloc + 8);                 /* clear insn map */
 #endif
+#ifdef _M_ARM
+  *(void(**)(void))(closure->tramp + FFI_TRAMPOLINE_CLOSURE_FUNCTION) = closure_func;
+#else
   *(void (**)(void))(closure->tramp + 8) = closure_func;
+#endif
 #endif
 
   closure->cif = cif;
@@ -782,7 +814,7 @@ place_vfp_arg (ffi_cif *cif, int h)
        }
       /* Found regs to allocate. */
       cif->vfp_used |= new_used;
-      cif->vfp_args[cif->vfp_nargs++] = reg;
+      cif->vfp_args[cif->vfp_nargs++] = (signed char)reg;
 
       /* Update vfp_reg_free. */
       if (cif->vfp_used & (1 << cif->vfp_reg_free))
@@ -804,7 +836,7 @@ place_vfp_arg (ffi_cif *cif, int h)
 static void
 layout_vfp_args (ffi_cif * cif)
 {
-  int i;
+  unsigned int i;
   /* Init VFP fields */
   cif->vfp_used = 0;
   cif->vfp_nargs = 0;
@@ -819,4 +851,4 @@ layout_vfp_args (ffi_cif * cif)
     }
 }
 
-#endif /* __arm__ */
+#endif /* __arm__ or _M_ARM */
index 1cf10364072685e0f16e1176c782578ab70c8e94..cb57b84880ef78fa69898e50b7bb9a6afbaa9507 100644 (file)
@@ -43,7 +43,7 @@ typedef enum ffi_abi {
   FFI_SYSV,
   FFI_VFP,
   FFI_LAST_ABI,
-#ifdef __ARM_PCS_VFP
+#if defined(__ARM_PCS_VFP) || defined(_M_ARM)
   FFI_DEFAULT_ABI = FFI_VFP,
 #else
   FFI_DEFAULT_ABI = FFI_SYSV,
@@ -57,7 +57,9 @@ typedef enum ffi_abi {
   signed char vfp_args[16]                     \
 
 #define FFI_TARGET_SPECIFIC_VARIADIC
+#ifndef _M_ARM
 #define FFI_TARGET_HAS_COMPLEX_TYPE
+#endif
 
 /* ---- Definitions for closures ----------------------------------------- */
 
@@ -74,8 +76,13 @@ typedef enum ffi_abi {
 #error "No trampoline table implementation"
 #endif
 
+#else
+#ifdef _MSC_VER
+#define FFI_TRAMPOLINE_SIZE 16
+#define FFI_TRAMPOLINE_CLOSURE_FUNCTION 12
 #else
 #define FFI_TRAMPOLINE_SIZE 12
+#endif
 #define FFI_TRAMPOLINE_CLOSURE_OFFSET FFI_TRAMPOLINE_SIZE
 #endif
 
diff --git a/src/arm/sysv_msvc_arm32.S b/src/arm/sysv_msvc_arm32.S
new file mode 100644 (file)
index 0000000..5c99d02
--- /dev/null
@@ -0,0 +1,311 @@
+/* -----------------------------------------------------------------------
+   sysv.S - Copyright (c) 1998, 2008, 2011 Red Hat, Inc.
+        Copyright (c) 2011 Plausible Labs Cooperative, Inc.
+        Copyright (c) 2019 Microsoft Corporation.
+
+   ARM Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "internal.h"
+#include "ksarm.h"
+
+
+        ; 8 byte aligned AREA to support 8 byte aligned jump tables
+        MACRO
+        NESTED_ENTRY_FFI $FuncName, $AreaName, $ExceptHandler
+
+        ; compute the function's labels
+        __DeriveFunctionLabels $FuncName
+
+        ; determine the area we will put the function into
+__FuncArea   SETS    "|.text|"
+        IF "$AreaName" != ""
+__FuncArea   SETS    "$AreaName"
+        ENDIF
+
+        ; set up the exception handler itself
+__FuncExceptionHandler SETS ""
+        IF "$ExceptHandler" != ""
+__FuncExceptionHandler SETS    "|$ExceptHandler|"
+        ENDIF
+
+        ; switch to the specified area, jump tables require 8 byte alignment
+        AREA    $__FuncArea,CODE,CODEALIGN,ALIGN=3,READONLY
+
+        ; export the function name
+        __ExportProc $FuncName
+
+        ; flush any pending literal pool stuff
+        ROUT
+
+        ; reset the state of the unwind code tracking
+        __ResetUnwindState
+
+        MEND
+
+;        MACRO
+;        TABLE_ENTRY $Type, $Table
+;$Type_$Table
+;        MEND
+
+#define E(index,table) return_##index##_##table
+
+    ; r0:   stack
+    ; r1:   frame
+    ; r2:   fn
+    ; r3:   vfp_used
+
+    ; fake entry point exists only to generate exists only to 
+    ; generate .pdata for exception unwinding
+    NESTED_ENTRY_FFI ffi_call_VFP_fake
+    PROLOG_PUSH  {r11, lr}          ; save fp and lr for unwind
+
+    ALTERNATE_ENTRY ffi_call_VFP
+    cmp    r3, #3                   ; load only d0 if possible
+    vldrle d0, [r0]
+    vldmgt r0, {d0-d7}
+    add    r0, r0, #64              ; discard the vfp register args
+    b ffi_call_SYSV
+    NESTED_END ffi_call_VFP_fake
+
+    ; fake entry point exists only to generate exists only to 
+    ; generate .pdata for exception unwinding
+    NESTED_ENTRY_FFI ffi_call_SYSV_fake
+    PROLOG_PUSH  {r11, lr}          ; save fp and lr for unwind
+
+    ALTERNATE_ENTRY ffi_call_SYSV
+    stm    r1, {fp, lr}
+    mov    fp, r1
+
+    mov    sp, r0                   ; install the stack pointer
+    mov    lr, r2                   ; move the fn pointer out of the way
+    ldr    ip, [fp, #16]            ; install the static chain
+    ldmia  sp!, {r0-r3}             ; move first 4 parameters in registers.
+    blx    lr                       ; call fn
+
+    ; Load r2 with the pointer to storage for the return value
+    ; Load r3 with the return type code
+    ldr    r2, [fp, #8]
+    ldr    r3, [fp, #12]
+
+    ; Deallocate the stack with the arguments.
+    mov    sp, fp
+
+    ; Store values stored in registers.
+    ALIGN 8
+    lsl     r3, #3
+    add     r3, r3, pc
+    add     r3, #8
+    mov     pc, r3
+
+
+E(ARM_TYPE_VFP_S, ffi_call)
+    ALIGN 8
+    vstr s0, [r2]
+    pop    {fp,pc}
+E(ARM_TYPE_VFP_D, ffi_call)
+    ALIGN 8
+    vstr d0, [r2]
+    pop    {fp,pc}
+E(ARM_TYPE_VFP_N, ffi_call)
+    ALIGN 8
+    vstm r2, {d0-d3}
+    pop    {fp,pc}
+E(ARM_TYPE_INT64, ffi_call)
+    ALIGN 8
+    str    r1, [r2, #4]
+    nop
+E(ARM_TYPE_INT, ffi_call)
+    ALIGN 8
+    str    r0, [r2]
+    pop    {fp,pc}
+E(ARM_TYPE_VOID, ffi_call)
+    ALIGN 8
+    pop    {fp,pc}
+    nop
+E(ARM_TYPE_STRUCT, ffi_call)
+    ALIGN 8
+    cmp r3, #ARM_TYPE_STRUCT
+    pop    {fp,pc}
+    NESTED_END ffi_call_SYSV_fake
+
+    IMPORT |ffi_closure_inner_SYSV|
+    /*
+    int ffi_closure_inner_SYSV
+    (
+        cif,        ; r0
+        fun,        ; r1
+        user_data,  ; r2
+        frame       ; r3
+    )
+    */
+
+    NESTED_ENTRY_FFI ffi_go_closure_SYSV
+    stmdb   sp!, {r0-r3}            ; save argument regs
+    ldr     r0, [ip, #4]            ; load cif
+    ldr     r1, [ip, #8]            ; load fun
+    mov     r2, ip                  ; load user_data
+    b       ffi_go_closure_SYSV_0
+    NESTED_END ffi_go_closure_SYSV
+
+    ; r3:    ffi_closure
+
+    ; fake entry point exists only to generate exists only to 
+    ; generate .pdata for exception unwinding
+    NESTED_ENTRY_FFI ffi_closure_SYSV_fake  
+    PROLOG_PUSH  {r11, lr}          ; save fp and lr for unwind
+    ALTERNATE_ENTRY ffi_closure_SYSV
+    ldmfd   sp!, {ip,r0}            ; restore fp (r0 is used for stack alignment)
+    stmdb   sp!, {r0-r3}            ; save argument regs
+
+    ldr     r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET]    ; ffi_closure->cif
+    ldr     r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4]  ; ffi_closure->fun
+    ldr     r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8]  ; ffi_closure->user_data
+
+    ALTERNATE_ENTRY ffi_go_closure_SYSV_0
+    add     ip, sp, #16             ; compute entry sp
+
+    sub     sp, sp, #64+32          ; allocate frame parameter (sizeof(vfp_space) = 64, sizeof(result) = 32)
+    mov     r3, sp                  ; set frame parameter
+    stmdb   sp!, {ip,lr}
+
+    bl      ffi_closure_inner_SYSV  ; call the Python closure
+
+                                    ; Load values returned in registers.
+    add     r2, sp, #64+8           ; address of closure_frame->result
+    bl      ffi_closure_ret         ; move result to correct register or memory for type
+
+    ldmfd   sp!, {ip,lr}
+    mov     sp, ip                  ; restore stack pointer
+    mov     pc, lr
+    NESTED_END ffi_closure_SYSV_fake
+
+    IMPORT |ffi_closure_inner_VFP|
+    /*
+    int ffi_closure_inner_VFP
+    (
+        cif,        ; r0
+        fun,        ; r1
+        user_data,  ; r2
+        frame       ; r3
+    )
+    */
+
+    NESTED_ENTRY_FFI ffi_go_closure_VFP
+    stmdb   sp!, {r0-r3}                       ; save argument regs
+    ldr        r0, [ip, #4]                    ; load cif
+    ldr        r1, [ip, #8]                    ; load fun
+    mov        r2, ip                          ; load user_data
+    b  ffi_go_closure_VFP_0
+    NESTED_END ffi_go_closure_VFP
+
+    ; fake entry point exists only to generate exists only to 
+    ; generate .pdata for exception unwinding
+    ; r3:    closure
+    NESTED_ENTRY_FFI ffi_closure_VFP_fake
+    PROLOG_PUSH  {r11, lr}          ; save fp and lr for unwind
+
+    ALTERNATE_ENTRY ffi_closure_VFP
+    ldmfd   sp!, {ip,r0}            ; restore fp (r0 is used for stack alignment)
+    stmdb   sp!, {r0-r3}            ; save argument regs
+
+    ldr     r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET]    ; load cif
+    ldr     r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4]  ; load fun
+    ldr     r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8]  ; load user_data
+
+    ALTERNATE_ENTRY ffi_go_closure_VFP_0
+    add     ip, sp, #16             ; compute entry sp
+    sub     sp, sp, #32             ; save space for closure_frame->result
+    vstmdb  sp!, {d0-d7}            ; push closure_frame->vfp_space
+
+    mov     r3, sp                  ; save closure_frame
+    stmdb   sp!, {ip,lr}
+
+    bl      ffi_closure_inner_VFP
+
+    ; Load values returned in registers.
+    add     r2, sp, #64+8           ; load result
+    bl      ffi_closure_ret
+    ldmfd   sp!, {ip,lr}
+    mov     sp, ip                  ; restore stack pointer
+    mov     pc, lr
+    NESTED_END ffi_closure_VFP_fake
+
+/* Load values returned in registers for both closure entry points.
+   Note that we use LDM with SP in the register set.  This is deprecated
+   by ARM, but not yet unpredictable.  */
+
+    NESTED_ENTRY_FFI ffi_closure_ret
+    stmdb sp!, {fp,lr}
+
+    ALIGN 8
+    lsl     r0, #3
+    add     r0, r0, pc
+    add     r0, #8
+    mov     pc, r0
+
+E(ARM_TYPE_VFP_S, ffi_closure)
+    ALIGN 8
+    vldr s0, [r2]
+    b call_epilogue
+E(ARM_TYPE_VFP_D, ffi_closure)
+    ALIGN 8
+    vldr d0, [r2]
+    b call_epilogue
+E(ARM_TYPE_VFP_N, ffi_closure)
+    ALIGN 8
+    vldm r2, {d0-d3}
+    b call_epilogue
+E(ARM_TYPE_INT64, ffi_closure)
+    ALIGN 8
+    ldr    r1, [r2, #4]
+    nop
+E(ARM_TYPE_INT, ffi_closure)
+    ALIGN 8
+    ldr    r0, [r2]
+    b call_epilogue
+E(ARM_TYPE_VOID, ffi_closure)
+    ALIGN 8
+    b call_epilogue
+    nop
+E(ARM_TYPE_STRUCT, ffi_closure)
+    ALIGN 8
+    b call_epilogue
+call_epilogue
+    ldmfd sp!, {fp,pc}
+    NESTED_END ffi_closure_ret
+
+    AREA |.trampoline|, DATA, THUMB, READONLY
+    EXPORT |ffi_arm_trampoline|
+|ffi_arm_trampoline| DATA
+thisproc    adr    ip, thisproc
+            stmdb  sp!, {ip, r0}
+            ldr    pc, [pc, #0]
+            DCD    0
+            ;ENDP
+
+    END
\ No newline at end of file