Make ARMV7 compile with xcode and add a CI job for it (#2537)
authorMartin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Thu, 2 Apr 2020 08:30:37 +0000 (10:30 +0200)
committerGitHub <noreply@github.com>
Thu, 2 Apr 2020 08:30:37 +0000 (10:30 +0200)
* Add an ARMV7 iOS build on Travis

* thread_local appears to be unavailable on ARMV7 iOS

* Add no-thumb option for ARMV7 IOS build to get it to accept DMB ISH

* Make local labels in macros of nrm2_vfpv3.S compatible with the xcode assembler

.travis.yml
driver/level2/gemv_thread.c
kernel/arm/nrm2_vfpv3.S

index 0f20aef..2d82f88 100644 (file)
@@ -180,6 +180,12 @@ matrix:
         - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
         - BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
 
+    - <<: *test-macos
+      osx_image: xcode10.1
+      env:
+        - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
+        - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
+        - BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
 # whitelist
 branches:
   only:
index d577403..0d8c6b0 100644 (file)
@@ -72,9 +72,9 @@
        defined __BORLANDC__ )
 #  define thread_local __declspec(thread) 
 /* note that ICC (linux) and Clang are covered by __GNUC__ */
-# elif defined __GNUC__ || \
+# elif (defined __GNUC__ || \
        defined __SUNPRO_C || \
-       defined __xlC__
+       defined __xlC__) && !defined(__APPLE__)
 #  define thread_local __thread
 # else
 # define UNSAFE
index 7be1e97..82ae5e8 100644 (file)
@@ -61,20 +61,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        vldmia.f64      X!,     { d4 }
        vcmpe.f64       d4, d6                          // compare with 0.0
        vmrs            APSR_nzcv, fpscr
-       beq             KERNEL_F1_NEXT_\@
+       beq             1f     /* KERNEL_F1_NEXT_\@ */
        vabs.f64        d4,  d4
        vcmpe.f64       d0,  d4                         // compare with scale
        vmrs            APSR_nzcv, fpscr
        vdivge.f64      d2 , d4, d0                     // scale >= x ? x / scale
        vmlage.f64      d1 , d2 , d2                    // ssq += ( x/scale ) * ( x/scale )
-       bge             KERNEL_F1_NEXT_\@
+       bge             1f     /* KERNEL_F1_NEXT_\@ */
        vdiv.f64        d2 , d0, d4                     // scale / x
        vmul.f64        d2 , d2, d2                     // ( scale / x ) * ( scale / x )
        vmul.f64        d3 , d1, d2                     // ssq * ( scale / x ) * ( scale / x )
        vadd.f64        d1 , d3, d7                     // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
        vmov.f64        d0 , d4                         // scale = x
 
-KERNEL_F1_NEXT_\@:
+1:                             /* KERNEL_F1_NEXT_\@: */
 
 .endm
 
@@ -124,20 +124,20 @@ KERNEL_S1_NEXT:
        vldmia.f32      X!,     { s4 }
        vcmpe.f32       s4, s6                          // compare with 0.0
        vmrs            APSR_nzcv, fpscr
-       beq             KERNEL_F1_NEXT_\@
+       beq             1f        /* KERNEL_F1_NEXT_\@ */
        vabs.f32        s4,  s4
        vcmpe.f32       s0,  s4                         // compare with scale
        vmrs            APSR_nzcv, fpscr
        vdivge.f32      s2 , s4, s0                     // scale >= x ? x / scale
        vmlage.f32      s1 , s2 , s2                    // ssq += ( x/scale ) * ( x/scale )
-       bge             KERNEL_F1_NEXT_\@
+       bge             1f        /* KERNEL_F1_NEXT_\@ */
        vdiv.f32        s2 , s0, s4                     // scale / x
        vmul.f32        s2 , s2, s2                     // ( scale / x ) * ( scale / x )
        vmul.f32        s3 , s1, s2                     // ssq * ( scale / x ) * ( scale / x )
        vadd.f32        s1 , s3, s7                     // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
        vmov.f32        s0 , s4                         // scale = x
 
-KERNEL_F1_NEXT_\@:
+1:                                /* KERNEL_F1_NEXT_\@: */
 
 .endm
 
@@ -195,37 +195,37 @@ KERNEL_S1_NEXT:
 
        vcmpe.f64       d4, d6                          // compare with 0.0
        vmrs            APSR_nzcv, fpscr
-       beq             KERNEL_F1_NEXT_\@
+       beq             1f           /* KERNEL_F1_NEXT_\@ */
        vabs.f64        d4,  d4
        vcmpe.f64       d0,  d4                         // compare with scale
        vmrs            APSR_nzcv, fpscr
        vdivge.f64      d2 , d4, d0                     // scale >= x ? x / scale
        vmlage.f64      d1 , d2 , d2                    // ssq += ( x/scale ) * ( x/scale )
-       bge             KERNEL_F1_NEXT_\@
+       bge             1f           /* KERNEL_F1_NEXT_\@ */
        vdiv.f64        d2 , d0, d4                     // scale / x
        vmul.f64        d2 , d2, d2                     // ( scale / x ) * ( scale / x )
        vmul.f64        d3 , d1, d2                     // ssq * ( scale / x ) * ( scale / x )
        vadd.f64        d1 , d3, d7                     // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
        vmov.f64        d0 , d4                         // scale = x
 
-KERNEL_F1_NEXT_\@:
+1:                                   /* KERNEL_F1_NEXT_\@: */
 
        vcmpe.f64       d5, d6                          // compare with 0.0
        vmrs            APSR_nzcv, fpscr
-       beq             KERNEL_F1_END_\@
+       beq             2f           /* KERNEL_F1_END_\@ */
        vabs.f64        d5,  d5
        vcmpe.f64       d0,  d5                         // compare with scale
        vmrs            APSR_nzcv, fpscr
        vdivge.f64      d2 , d5, d0                     // scale >= x ? x / scale
        vmlage.f64      d1 , d2 , d2                    // ssq += ( x/scale ) * ( x/scale )
-       bge             KERNEL_F1_END_\@
+       bge             2f           /* KERNEL_F1_END_\@ */
        vdiv.f64        d2 , d0, d5                     // scale / x
        vmul.f64        d2 , d2, d2                     // ( scale / x ) * ( scale / x )
        vmul.f64        d3 , d1, d2                     // ssq * ( scale / x ) * ( scale / x )
        vadd.f64        d1 , d3, d7                     // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
        vmov.f64        d0 , d5                         // scale = x
 
-KERNEL_F1_END_\@:
+2:                                   /* KERNEL_F1_END_\@: */
 
 
 .endm
@@ -253,37 +253,37 @@ KERNEL_F1_END_\@:
 
        vcmpe.f64       d4, d6                          // compare with 0.0
        vmrs            APSR_nzcv, fpscr
-       beq             KERNEL_S1_NEXT_\@
+       beq             1f          /* KERNEL_S1_NEXT_\@ */
        vabs.f64        d4,  d4
        vcmpe.f64       d0,  d4                         // compare with scale
        vmrs            APSR_nzcv, fpscr
        vdivge.f64      d2 , d4, d0                     // scale >= x ? x / scale
        vmlage.f64      d1 , d2 , d2                    // ssq += ( x/scale ) * ( x/scale )
-       bge             KERNEL_S1_NEXT_\@
+       bge             1f          /* KERNEL_S1_NEXT_\@ */
        vdiv.f64        d2 , d0, d4                     // scale / x
        vmul.f64        d2 , d2, d2                     // ( scale / x ) * ( scale / x )
        vmul.f64        d3 , d1, d2                     // ssq * ( scale / x ) * ( scale / x )
        vadd.f64        d1 , d3, d7                     // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
        vmov.f64        d0 , d4                         // scale = x
 
-KERNEL_S1_NEXT_\@:
+1:                                  /* KERNEL_S1_NEXT_\@: */
 
        vcmpe.f64       d5, d6                          // compare with 0.0
        vmrs            APSR_nzcv, fpscr
-       beq             KERNEL_S1_END_\@
+       beq             2f          /* KERNEL_S1_END_\@ */
        vabs.f64        d5,  d5
        vcmpe.f64       d0,  d5                         // compare with scale
        vmrs            APSR_nzcv, fpscr
        vdivge.f64      d2 , d5, d0                     // scale >= x ? x / scale
        vmlage.f64      d1 , d2 , d2                    // ssq += ( x/scale ) * ( x/scale )
-       bge             KERNEL_S1_END_\@
+       bge             2f          /* KERNEL_S1_END_\@ */
        vdiv.f64        d2 , d0, d5                     // scale / x
        vmul.f64        d2 , d2, d2                     // ( scale / x ) * ( scale / x )
        vmul.f64        d3 , d1, d2                     // ssq * ( scale / x ) * ( scale / x )
        vadd.f64        d1 , d3, d7                     // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
        vmov.f64        d0 , d5                         // scale = x
 
-KERNEL_S1_END_\@:
+2:                                  /* KERNEL_S1_END_\@: */
 
        add     X, X, INC_X
 
@@ -298,37 +298,37 @@ KERNEL_S1_END_\@:
 
        vcmpe.f32       s4, s6                          // compare with 0.0
        vmrs            APSR_nzcv, fpscr
-       beq             KERNEL_F1_NEXT_\@
+       beq             1f         /* KERNEL_F1_NEXT_\@ */
        vabs.f32        s4,  s4
        vcmpe.f32       s0,  s4                         // compare with scale
        vmrs            APSR_nzcv, fpscr
        vdivge.f32      s2 , s4, s0                     // scale >= x ? x / scale
        vmlage.f32      s1 , s2 , s2                    // ssq += ( x/scale ) * ( x/scale )
-       bge             KERNEL_F1_NEXT_\@
+       bge             1f         /* KERNEL_F1_NEXT_\@ */
        vdiv.f32        s2 , s0, s4                     // scale / x
        vmul.f32        s2 , s2, s2                     // ( scale / x ) * ( scale / x )
        vmul.f32        s3 , s1, s2                     // ssq * ( scale / x ) * ( scale / x )
        vadd.f32        s1 , s3, s7                     // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
        vmov.f32        s0 , s4                         // scale = x
 
-KERNEL_F1_NEXT_\@:
+1:                                 /* KERNEL_F1_NEXT_\@: */
 
        vcmpe.f32       s5, s6                          // compare with 0.0
        vmrs            APSR_nzcv, fpscr
-       beq             KERNEL_F1_END_\@
+       beq             2f         /* KERNEL_F1_END_\@ */
        vabs.f32        s5,  s5
        vcmpe.f32       s0,  s5                         // compare with scale
        vmrs            APSR_nzcv, fpscr
        vdivge.f32      s2 , s5, s0                     // scale >= x ? x / scale
        vmlage.f32      s1 , s2 , s2                    // ssq += ( x/scale ) * ( x/scale )
-       bge             KERNEL_F1_END_\@
+       bge             2f         /* KERNEL_F1_END_\@ */
        vdiv.f32        s2 , s0, s5                     // scale / x
        vmul.f32        s2 , s2, s2                     // ( scale / x ) * ( scale / x )
        vmul.f32        s3 , s1, s2                     // ssq * ( scale / x ) * ( scale / x )
        vadd.f32        s1 , s3, s7                     // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
        vmov.f32        s0 , s5                         // scale = x
 
-KERNEL_F1_END_\@:
+2:                                 /* KERNEL_F1_END_\@: */
 
 
 .endm
@@ -354,37 +354,37 @@ KERNEL_F1_END_\@:
 
        vcmpe.f32       s4, s6                          // compare with 0.0
        vmrs            APSR_nzcv, fpscr
-       beq             KERNEL_S1_NEXT_\@
+       beq             1f          /* KERNEL_S1_NEXT_\@ */
        vabs.f32        s4,  s4
        vcmpe.f32       s0,  s4                         // compare with scale
        vmrs            APSR_nzcv, fpscr
        vdivge.f32      s2 , s4, s0                     // scale >= x ? x / scale
        vmlage.f32      s1 , s2 , s2                    // ssq += ( x/scale ) * ( x/scale )
-       bge             KERNEL_S1_NEXT_\@
+       bge             1f          /* KERNEL_S1_NEXT_\@ */
        vdiv.f32        s2 , s0, s4                     // scale / x
        vmul.f32        s2 , s2, s2                     // ( scale / x ) * ( scale / x )
        vmul.f32        s3 , s1, s2                     // ssq * ( scale / x ) * ( scale / x )
        vadd.f32        s1 , s3, s7                     // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
        vmov.f32        s0 , s4                         // scale = x
 
-KERNEL_S1_NEXT_\@:
+1:                                  /* KERNEL_S1_NEXT_\@: */
 
        vcmpe.f32       s5, s6                          // compare with 0.0
        vmrs            APSR_nzcv, fpscr
-       beq             KERNEL_S1_END_\@
+       beq             2f          /* KERNEL_S1_END_\@ */
        vabs.f32        s5,  s5
        vcmpe.f32       s0,  s5                         // compare with scale
        vmrs            APSR_nzcv, fpscr
        vdivge.f32      s2 , s5, s0                     // scale >= x ? x / scale
        vmlage.f32      s1 , s2 , s2                    // ssq += ( x/scale ) * ( x/scale )
-       bge             KERNEL_S1_END_\@
+       bge             2f          /* KERNEL_S1_END_\@ */
        vdiv.f32        s2 , s0, s5                     // scale / x
        vmul.f32        s2 , s2, s2                     // ( scale / x ) * ( scale / x )
        vmul.f32        s3 , s1, s2                     // ssq * ( scale / x ) * ( scale / x )
        vadd.f32        s1 , s3, s7                     // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
        vmov.f32        s0 , s5                         // scale = x
 
-KERNEL_S1_END_\@:
+2:                                  /* KERNEL_S1_END_\@: */
 
        add     X, X, INC_X