From 7d64a0eec4c3311659e6874d8d9a0ea3e753d005 Mon Sep 17 00:00:00 2001 From: James Molloy Date: Wed, 25 Jun 2014 11:46:24 +0000 Subject: [PATCH] [AArch32] Fix a stupid error in an architectural guard The < 8 instead of <= 8 meant that a bunch of vreinterprets were not available on v8 AArch32. Simplify the guard to just !defined(aarch64) while we're at it, and enable some v8 AArch32 testing. llvm-svn: 211686 --- clang/include/clang/Basic/arm_neon.td | 2 +- clang/test/CodeGen/arm_neon_intrinsics.c | 82 +++++++++++++++++++------------- 2 files changed, 50 insertions(+), 34 deletions(-) diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 32e6f3a..4dba0f1 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -790,7 +790,7 @@ def VREINTERPRET : NoTestOpInst<"vreinterpret", "dd", "csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs", OP_REINT> { let CartesianProductOfTypes = 1; - let ArchGuard = "__ARM_ARCH < 8"; + let ArchGuard = "!defined(__aarch64__)"; } //////////////////////////////////////////////////////////////////////////////// diff --git a/clang/test/CodeGen/arm_neon_intrinsics.c b/clang/test/CodeGen/arm_neon_intrinsics.c index 9ec3451..384ea33 100644 --- a/clang/test/CodeGen/arm_neon_intrinsics.c +++ b/clang/test/CodeGen/arm_neon_intrinsics.c @@ -1,6 +1,9 @@ // RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\ // RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\ -// RUN: | FileCheck %s +// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SWIFT +// RUN: %clang_cc1 -triple armv8-linux-gnu \ +// RUN: -target-cpu cortex-a57 -mfloat-abi soft -ffreestanding -Os -S -o - %s\ +// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-A57 // REQUIRES: long_tests @@ -2645,7 +2648,7 @@ uint32x4_t test_vld1q_u32(uint32_t const * a) { } // CHECK: test_vld1q_u64 -// CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}] +// CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}{{(:64)?}}] uint64x2_t test_vld1q_u64(uint64_t const * a) { return vld1q_u64(a); } @@ -2669,7 +2672,7 @@ int32x4_t test_vld1q_s32(int32_t const * a) { } // CHECK: test_vld1q_s64 -// CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}] +// CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}{{(:64)?}}] int64x2_t test_vld1q_s64(int64_t const * a) { return vld1q_s64(a); } @@ -2717,7 +2720,7 @@ uint32x2_t test_vld1_u32(uint32_t const * a) { } // CHECK: test_vld1_u64 -// CHECK: vld1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}] +// CHECK: vld1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}{{(:64)?}}] uint64x1_t test_vld1_u64(uint64_t const * a) { return vld1_u64(a); } @@ -2741,7 +2744,7 @@ int32x2_t test_vld1_s32(int32_t const * a) { } // CHECK: test_vld1_s64 -// CHECK: vld1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}] +// CHECK: vld1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}{{(:64)?}}] int64x1_t test_vld1_s64(int64_t const * a) { return vld1_s64(a); } @@ -4177,8 +4180,9 @@ int32x2_t test_vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) { } // CHECK: test_vmla_f32 -// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: vadd.f32 +// CHECK-SWIFT: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-SWIFT: vadd.f32 +// CHECK-A57: vmla.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} float32x2_t test_vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) { return vmla_f32(a, b, c); } @@ -4220,8 +4224,9 @@ int32x4_t test_vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { } // CHECK: test_vmlaq_f32 -// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: vadd.f32 +// CHECK-SWIFT: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// CHECK-SWIFT: vadd.f32 +// CHECK-A57: vmla.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} float32x4_t test_vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { return vmlaq_f32(a, b, c); } @@ -4357,8 +4362,9 @@ uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { } // CHECK: test_vmla_lane_f32 -// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}] -// CHECK: vadd.f32 +// CHECK-SWIFT: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}] +// CHECK-SWIFT: vadd.f32 +// CHECK-A57: vmla.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}] float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) { return vmla_lane_f32(a, b, c, 1); } @@ -4388,8 +4394,9 @@ uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) { } // CHECK: test_vmlaq_lane_f32 -// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}] -// CHECK: vadd.f32 +// CHECK-SWIFT: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}] +// CHECK-SWIFT: vadd.f32 +// CHECK-A57: vmla.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}] float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) { return vmlaq_lane_f32(a, b, c, 1); } @@ -4420,8 +4427,9 @@ uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { } // CHECK: test_vmla_n_f32 -// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: vadd.f32 +// CHECK-SWIFT: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-SWIFT: vadd.f32 +// CHECK-A57: vmla.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) { return vmla_n_f32(a, b, c); } @@ -4451,8 +4459,10 @@ uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { } // CHECK: test_vmlaq_n_f32 -// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[0] -// CHECK: vadd.f32 +// CHECK-SWIFT: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[0] +// CHECK-SWIFT: vadd.f32 +// CHECK-A57: vld1.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, +// CHECK-A57: vmla.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { return vmlaq_n_f32(a, b, c); } @@ -4477,8 +4487,9 @@ int32x2_t test_vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) { } // CHECK: test_vmls_f32 -// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: vsub.f32 +// CHECK-SWIFT: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-SWIFT: vsub.f32 +// CHECK-A57: vmls.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} float32x2_t test_vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) { return vmls_f32(a, b, c); } @@ -4520,8 +4531,9 @@ int32x4_t test_vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { } // CHECK: test_vmlsq_f32 -// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// CHECK: vsub.f32 +// CHECK-SWIFT: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// CHECK-SWIFT: vsub.f32 +// CHECK-A57: vmls.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} float32x4_t test_vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { return vmlsq_f32(a, b, c); } @@ -4657,8 +4669,9 @@ uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { } // CHECK: test_vmls_lane_f32 -// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}] -// CHECK: vsub.f32 +// CHECK-SWIFT: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}] +// CHECK-SWIFT: vsub.f32 +// CHECK-A57: vmls.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}] float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) { return vmls_lane_f32(a, b, c, 1); } @@ -4688,8 +4701,9 @@ uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) { } // CHECK: test_vmlsq_lane_f32 -// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}] -// CHECK: vsub.f32 +// CHECK-SWIFT: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}] +// CHECK-SWIFT: vsub.f32 +// CHECK-A57: vmls.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}] float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) { return vmlsq_lane_f32(a, b, c, 1); } @@ -4720,8 +4734,9 @@ uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { } // CHECK: test_vmls_n_f32 -// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -// CHECK: vsub.f32 +// CHECK-SWIFT: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-SWIFT: vsub.f32 +// CHECK-A57: vmls.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { return vmls_n_f32(a, b, c); } @@ -4751,8 +4766,9 @@ uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { } // CHECK: test_vmlsq_n_f32 -// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[0] -// CHECK: vsub.f32 +// CHECK-SWIFT: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[0] +// CHECK-SWIFT: vsub.f32 +// CHECK-A57: vmls.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { return vmlsq_n_f32(a, b, c); } @@ -9883,7 +9899,7 @@ void test_vst1q_u32(uint32_t * a, uint32x4_t b) { } // CHECK: test_vst1q_u64 -// CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}] +// CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}{{(:64)?}}] void test_vst1q_u64(uint64_t * a, uint64x2_t b) { vst1q_u64(a, b); } @@ -9907,7 +9923,7 @@ void test_vst1q_s32(int32_t * a, int32x4_t b) { } // CHECK: test_vst1q_s64 -// CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}] +// CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}{{(:64)?}}] void test_vst1q_s64(int64_t * a, int64x2_t b) { vst1q_s64(a, b); } @@ -9955,7 +9971,7 @@ void test_vst1_u32(uint32_t * a, uint32x2_t b) { } // CHECK: test_vst1_u64 -// CHECK: vst1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}] +// CHECK: vst1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}{{(:64)?}}] void test_vst1_u64(uint64_t * a, uint64x1_t b) { vst1_u64(a, b); } @@ -9979,7 +9995,7 @@ void test_vst1_s32(int32_t * a, int32x2_t b) { } // CHECK: test_vst1_s64 -// CHECK: vst1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}] +// CHECK: vst1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}{{(:64)?}}] void test_vst1_s64(int64_t * a, int64x1_t b) { vst1_s64(a, b); } -- 2.7.4