[Clang] Convert some tests to opaque pointers (NFC)

author Nikita Popov <npopov@redhat.com>

Mon, 12 Dec 2022 16:13:39 +0000 (17:13 +0100)

committer Nikita Popov <npopov@redhat.com>

Mon, 12 Dec 2022 16:24:40 +0000 (17:24 +0100)
author Nikita Popov <npopov@redhat.com>
Mon, 12 Dec 2022 16:13:39 +0000 (17:13 +0100)
committer Nikita Popov <npopov@redhat.com>
Mon, 12 Dec 2022 16:24:40 +0000 (17:24 +0100)
diff --git a/clang/test/CodeGen/aarch64-ls64.c b/clang/test/CodeGen/aarch64-ls64.c

index 408a084..341d54d 100644 (file)
--- a/clang/test/CodeGen/aarch64-ls64.c
+++ b/clang/test/CodeGen/aarch64-ls64.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-eabi -target-feature +ls64 -S -emit-llvm -x c %s -o - | FileCheck --check-prefixes=CHECK-C %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-eabi -target-feature +ls64 -S -emit-llvm -x c++ %s -o - | FileCheck --check-prefixes=CHECK-CXX %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64_be-eabi -target-feature +ls64 -S -emit-llvm -x c %s -o - | FileCheck  --check-prefixes=CHECK-C %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64_be-eabi -target-feature +ls64 -S -emit-llvm -x c++ %s -o - | FileCheck  --check-prefixes=CHECK-CXX %s
+// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +ls64 -S -emit-llvm -x c %s -o - | FileCheck --check-prefixes=CHECK-C %s
+// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +ls64 -S -emit-llvm -x c++ %s -o - | FileCheck --check-prefixes=CHECK-CXX %s
+// RUN: %clang_cc1 -triple aarch64_be-eabi -target-feature +ls64 -S -emit-llvm -x c %s -o - | FileCheck  --check-prefixes=CHECK-C %s
+// RUN: %clang_cc1 -triple aarch64_be-eabi -target-feature +ls64 -S -emit-llvm -x c++ %s -o - | FileCheck  --check-prefixes=CHECK-CXX %s
  
  #include <arm_acle.h>
  
@@ -16,80 +16,74 @@ data512_t val;
  void *addr;
  uint64_t status;
  
-// CHECK-C-LABEL: @test_ld64b(
+// CHECK-C-LABEL: define {{[^@]+}}@test_ld64b(
  // CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca i8*, align 8
+// CHECK-C-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
  // CHECK-C-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
-// CHECK-C-NEXT:    [[TMP0:%.*]] = load i8*, i8** @addr, align 8
+// CHECK-C-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
  // CHECK-C-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]])
-// CHECK-C-NEXT:    store i8* [[TMP0]], i8** [[__ADDR_ADDR_I]], align 8, !noalias !2
-// CHECK-C-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[__ADDR_ADDR_I]], align 8, !noalias !2
-// CHECK-C-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_DATA512_T]], %struct.data512_t* [[TMP]], i32 0, i32 0
-// CHECK-C-NEXT:    [[ARRAYDECAY_I:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[VAL_I]], i64 0, i64 0
-// CHECK-C-NEXT:    [[TMP2:%.*]] = call { i64, i64, i64, i64, i64, i64, i64, i64 } @llvm.aarch64.ld64b(i8* [[TMP1]]), !noalias !2
+// CHECK-C-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8, !noalias !2
+// CHECK-C-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8, !noalias !2
+// CHECK-C-NEXT:    [[TMP2:%.*]] = call { i64, i64, i64, i64, i64, i64, i64, i64 } @llvm.aarch64.ld64b(ptr [[TMP1]]), !noalias !2
  // CHECK-C-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 0
-// CHECK-C-NEXT:    store i64 [[TMP3]], i64* [[ARRAYDECAY_I]], align 8, !alias.scope !2
-// CHECK-C-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 1
+// CHECK-C-NEXT:    store i64 [[TMP3]], ptr [[TMP]], align 8, !alias.scope !2
+// CHECK-C-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[TMP]], i32 1
  // CHECK-C-NEXT:    [[TMP5:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 1
-// CHECK-C-NEXT:    store i64 [[TMP5]], i64* [[TMP4]], align 8, !alias.scope !2
-// CHECK-C-NEXT:    [[TMP6:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 2
+// CHECK-C-NEXT:    store i64 [[TMP5]], ptr [[TMP4]], align 8, !alias.scope !2
+// CHECK-C-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[TMP]], i32 2
  // CHECK-C-NEXT:    [[TMP7:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 2
-// CHECK-C-NEXT:    store i64 [[TMP7]], i64* [[TMP6]], align 8, !alias.scope !2
-// CHECK-C-NEXT:    [[TMP8:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 3
+// CHECK-C-NEXT:    store i64 [[TMP7]], ptr [[TMP6]], align 8, !alias.scope !2
+// CHECK-C-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[TMP]], i32 3
  // CHECK-C-NEXT:    [[TMP9:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 3
-// CHECK-C-NEXT:    store i64 [[TMP9]], i64* [[TMP8]], align 8, !alias.scope !2
-// CHECK-C-NEXT:    [[TMP10:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 4
+// CHECK-C-NEXT:    store i64 [[TMP9]], ptr [[TMP8]], align 8, !alias.scope !2
+// CHECK-C-NEXT:    [[TMP10:%.*]] = getelementptr i64, ptr [[TMP]], i32 4
  // CHECK-C-NEXT:    [[TMP11:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 4
-// CHECK-C-NEXT:    store i64 [[TMP11]], i64* [[TMP10]], align 8, !alias.scope !2
-// CHECK-C-NEXT:    [[TMP12:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 5
+// CHECK-C-NEXT:    store i64 [[TMP11]], ptr [[TMP10]], align 8, !alias.scope !2
+// CHECK-C-NEXT:    [[TMP12:%.*]] = getelementptr i64, ptr [[TMP]], i32 5
  // CHECK-C-NEXT:    [[TMP13:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 5
-// CHECK-C-NEXT:    store i64 [[TMP13]], i64* [[TMP12]], align 8, !alias.scope !2
-// CHECK-C-NEXT:    [[TMP14:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 6
+// CHECK-C-NEXT:    store i64 [[TMP13]], ptr [[TMP12]], align 8, !alias.scope !2
+// CHECK-C-NEXT:    [[TMP14:%.*]] = getelementptr i64, ptr [[TMP]], i32 6
  // CHECK-C-NEXT:    [[TMP15:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 6
-// CHECK-C-NEXT:    store i64 [[TMP15]], i64* [[TMP14]], align 8, !alias.scope !2
-// CHECK-C-NEXT:    [[TMP16:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 7
+// CHECK-C-NEXT:    store i64 [[TMP15]], ptr [[TMP14]], align 8, !alias.scope !2
+// CHECK-C-NEXT:    [[TMP16:%.*]] = getelementptr i64, ptr [[TMP]], i32 7
  // CHECK-C-NEXT:    [[TMP17:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 7
-// CHECK-C-NEXT:    store i64 [[TMP17]], i64* [[TMP16]], align 8, !alias.scope !2
-// CHECK-C-NEXT:    [[TMP18:%.*]] = bitcast %struct.data512_t* [[TMP]] to i8*
-// CHECK-C-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 bitcast (%struct.data512_t* @val to i8*), i8* align 8 [[TMP18]], i64 64, i1 false)
+// CHECK-C-NEXT:    store i64 [[TMP17]], ptr [[TMP16]], align 8, !alias.scope !2
+// CHECK-C-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 @val, ptr align 8 [[TMP]], i64 64, i1 false)
  // CHECK-C-NEXT:    ret void
  //
-// CHECK-CXX-LABEL: @test_ld64b(
+// CHECK-CXX-LABEL: define {{[^@]+}}@test_ld64b(
  // CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca i8*, align 8
+// CHECK-CXX-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
  // CHECK-CXX-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load i8*, i8** @addr, align 8
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
  // CHECK-CXX-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]])
-// CHECK-CXX-NEXT:    store i8* [[TMP0]], i8** [[__ADDR_ADDR_I]], align 8, !noalias !2
-// CHECK-CXX-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[__ADDR_ADDR_I]], align 8, !noalias !2
-// CHECK-CXX-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_DATA512_T]], %struct.data512_t* [[REF_TMP]], i32 0, i32 0
-// CHECK-CXX-NEXT:    [[ARRAYDECAY_I:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[VAL_I]], i64 0, i64 0
-// CHECK-CXX-NEXT:    [[TMP2:%.*]] = call { i64, i64, i64, i64, i64, i64, i64, i64 } @llvm.aarch64.ld64b(i8* [[TMP1]]), !noalias !2
+// CHECK-CXX-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8, !noalias !2
+// CHECK-CXX-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8, !noalias !2
+// CHECK-CXX-NEXT:    [[TMP2:%.*]] = call { i64, i64, i64, i64, i64, i64, i64, i64 } @llvm.aarch64.ld64b(ptr [[TMP1]]), !noalias !2
  // CHECK-CXX-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 0
-// CHECK-CXX-NEXT:    store i64 [[TMP3]], i64* [[ARRAYDECAY_I]], align 8, !alias.scope !2
-// CHECK-CXX-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 1
+// CHECK-CXX-NEXT:    store i64 [[TMP3]], ptr [[REF_TMP]], align 8, !alias.scope !2
+// CHECK-CXX-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 1
  // CHECK-CXX-NEXT:    [[TMP5:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 1
-// CHECK-CXX-NEXT:    store i64 [[TMP5]], i64* [[TMP4]], align 8, !alias.scope !2
-// CHECK-CXX-NEXT:    [[TMP6:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 2
+// CHECK-CXX-NEXT:    store i64 [[TMP5]], ptr [[TMP4]], align 8, !alias.scope !2
+// CHECK-CXX-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 2
  // CHECK-CXX-NEXT:    [[TMP7:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 2
-// CHECK-CXX-NEXT:    store i64 [[TMP7]], i64* [[TMP6]], align 8, !alias.scope !2
-// CHECK-CXX-NEXT:    [[TMP8:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 3
+// CHECK-CXX-NEXT:    store i64 [[TMP7]], ptr [[TMP6]], align 8, !alias.scope !2
+// CHECK-CXX-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 3
  // CHECK-CXX-NEXT:    [[TMP9:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 3
-// CHECK-CXX-NEXT:    store i64 [[TMP9]], i64* [[TMP8]], align 8, !alias.scope !2
-// CHECK-CXX-NEXT:    [[TMP10:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 4
+// CHECK-CXX-NEXT:    store i64 [[TMP9]], ptr [[TMP8]], align 8, !alias.scope !2
+// CHECK-CXX-NEXT:    [[TMP10:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 4
  // CHECK-CXX-NEXT:    [[TMP11:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 4
-// CHECK-CXX-NEXT:    store i64 [[TMP11]], i64* [[TMP10]], align 8, !alias.scope !2
-// CHECK-CXX-NEXT:    [[TMP12:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 5
+// CHECK-CXX-NEXT:    store i64 [[TMP11]], ptr [[TMP10]], align 8, !alias.scope !2
+// CHECK-CXX-NEXT:    [[TMP12:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 5
  // CHECK-CXX-NEXT:    [[TMP13:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 5
-// CHECK-CXX-NEXT:    store i64 [[TMP13]], i64* [[TMP12]], align 8, !alias.scope !2
-// CHECK-CXX-NEXT:    [[TMP14:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 6
+// CHECK-CXX-NEXT:    store i64 [[TMP13]], ptr [[TMP12]], align 8, !alias.scope !2
+// CHECK-CXX-NEXT:    [[TMP14:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 6
  // CHECK-CXX-NEXT:    [[TMP15:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 6
-// CHECK-CXX-NEXT:    store i64 [[TMP15]], i64* [[TMP14]], align 8, !alias.scope !2
-// CHECK-CXX-NEXT:    [[TMP16:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 7
+// CHECK-CXX-NEXT:    store i64 [[TMP15]], ptr [[TMP14]], align 8, !alias.scope !2
+// CHECK-CXX-NEXT:    [[TMP16:%.*]] = getelementptr i64, ptr [[REF_TMP]], i32 7
  // CHECK-CXX-NEXT:    [[TMP17:%.*]] = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } [[TMP2]], 7
-// CHECK-CXX-NEXT:    store i64 [[TMP17]], i64* [[TMP16]], align 8, !alias.scope !2
-// CHECK-CXX-NEXT:    [[TMP18:%.*]] = bitcast %struct.data512_t* [[REF_TMP]] to i8*
-// CHECK-CXX-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 bitcast (%struct.data512_t* @val to i8*), i8* align 8 [[TMP18]], i64 64, i1 false)
+// CHECK-CXX-NEXT:    store i64 [[TMP17]], ptr [[TMP16]], align 8, !alias.scope !2
+// CHECK-CXX-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 @val, ptr align 8 [[REF_TMP]], i64 64, i1 false)
  // CHECK-CXX-NEXT:    ret void
  //
  EXTERN_C void test_ld64b(void)
@@ -97,62 +91,56 @@ EXTERN_C void test_ld64b(void)
      val = __arm_ld64b(addr);
  }
  
-// CHECK-C-LABEL: @test_st64b(
+// CHECK-C-LABEL: define {{[^@]+}}@test_st64b(
  // CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca i8*, align 8
+// CHECK-C-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
  // CHECK-C-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
-// CHECK-C-NEXT:    [[TMP0:%.*]] = load i8*, i8** @addr, align 8
-// CHECK-C-NEXT:    [[TMP1:%.*]] = bitcast %struct.data512_t* [[BYVAL_TEMP]] to i8*
-// CHECK-C-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 bitcast (%struct.data512_t* @val to i8*), i64 64, i1 false)
-// CHECK-C-NEXT:    store i8* [[TMP0]], i8** [[__ADDR_ADDR_I]], align 8
-// CHECK-C-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[__ADDR_ADDR_I]], align 8
-// CHECK-C-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_DATA512_T]], %struct.data512_t* [[BYVAL_TEMP]], i32 0, i32 0
-// CHECK-C-NEXT:    [[ARRAYDECAY_I:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[VAL_I]], i64 0, i64 0
-// CHECK-C-NEXT:    [[TMP3:%.*]] = load i64, i64* [[ARRAYDECAY_I]], align 8
-// CHECK-C-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 1
-// CHECK-C-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK-C-NEXT:    [[TMP6:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 2
-// CHECK-C-NEXT:    [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
-// CHECK-C-NEXT:    [[TMP8:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 3
-// CHECK-C-NEXT:    [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
-// CHECK-C-NEXT:    [[TMP10:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 4
-// CHECK-C-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8
-// CHECK-C-NEXT:    [[TMP12:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 5
-// CHECK-C-NEXT:    [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8
-// CHECK-C-NEXT:    [[TMP14:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 6
-// CHECK-C-NEXT:    [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8
-// CHECK-C-NEXT:    [[TMP16:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 7
-// CHECK-C-NEXT:    [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8
-// CHECK-C-NEXT:    call void @llvm.aarch64.st64b(i8* [[TMP2]], i64 [[TMP3]], i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]])
+// CHECK-C-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
+// CHECK-C-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[BYVAL_TEMP]], ptr align 8 @val, i64 64, i1 false)
+// CHECK-C-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8
+// CHECK-C-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8
+// CHECK-C-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BYVAL_TEMP]], align 8
+// CHECK-C-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 1
+// CHECK-C-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
+// CHECK-C-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 2
+// CHECK-C-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
+// CHECK-C-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 3
+// CHECK-C-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
+// CHECK-C-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 4
+// CHECK-C-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
+// CHECK-C-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 5
+// CHECK-C-NEXT:    [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
+// CHECK-C-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 6
+// CHECK-C-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
+// CHECK-C-NEXT:    [[TMP15:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 7
+// CHECK-C-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 8
+// CHECK-C-NEXT:    call void @llvm.aarch64.st64b(ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]])
  // CHECK-C-NEXT:    ret void
  //
-// CHECK-CXX-LABEL: @test_st64b(
+// CHECK-CXX-LABEL: define {{[^@]+}}@test_st64b(
  // CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca i8*, align 8
+// CHECK-CXX-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
  // CHECK-CXX-NEXT:    [[AGG_TMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load i8*, i8** @addr, align 8
-// CHECK-CXX-NEXT:    [[TMP1:%.*]] = bitcast %struct.data512_t* [[AGG_TMP]] to i8*
-// CHECK-CXX-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 bitcast (%struct.data512_t* @val to i8*), i64 64, i1 false)
-// CHECK-CXX-NEXT:    store i8* [[TMP0]], i8** [[__ADDR_ADDR_I]], align 8
-// CHECK-CXX-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[__ADDR_ADDR_I]], align 8
-// CHECK-CXX-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_DATA512_T]], %struct.data512_t* [[AGG_TMP]], i32 0, i32 0
-// CHECK-CXX-NEXT:    [[ARRAYDECAY_I:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[VAL_I]], i64 0, i64 0
-// CHECK-CXX-NEXT:    [[TMP3:%.*]] = load i64, i64* [[ARRAYDECAY_I]], align 8
-// CHECK-CXX-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 1
-// CHECK-CXX-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK-CXX-NEXT:    [[TMP6:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 2
-// CHECK-CXX-NEXT:    [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
-// CHECK-CXX-NEXT:    [[TMP8:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 3
-// CHECK-CXX-NEXT:    [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
-// CHECK-CXX-NEXT:    [[TMP10:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 4
-// CHECK-CXX-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8
-// CHECK-CXX-NEXT:    [[TMP12:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 5
-// CHECK-CXX-NEXT:    [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8
-// CHECK-CXX-NEXT:    [[TMP14:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 6
-// CHECK-CXX-NEXT:    [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8
-// CHECK-CXX-NEXT:    [[TMP16:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 7
-// CHECK-CXX-NEXT:    [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8
-// CHECK-CXX-NEXT:    call void @llvm.aarch64.st64b(i8* [[TMP2]], i64 [[TMP3]], i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]])
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
+// CHECK-CXX-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[AGG_TMP]], ptr align 8 @val, i64 64, i1 false)
+// CHECK-CXX-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8
+// CHECK-CXX-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8
+// CHECK-CXX-NEXT:    [[TMP2:%.*]] = load i64, ptr [[AGG_TMP]], align 8
+// CHECK-CXX-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 1
+// CHECK-CXX-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
+// CHECK-CXX-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 2
+// CHECK-CXX-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
+// CHECK-CXX-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 3
+// CHECK-CXX-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
+// CHECK-CXX-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 4
+// CHECK-CXX-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
+// CHECK-CXX-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 5
+// CHECK-CXX-NEXT:    [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
+// CHECK-CXX-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 6
+// CHECK-CXX-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
+// CHECK-CXX-NEXT:    [[TMP15:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 7
+// CHECK-CXX-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 8
+// CHECK-CXX-NEXT:    call void @llvm.aarch64.st64b(ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]])
  // CHECK-CXX-NEXT:    ret void
  //
  EXTERN_C void test_st64b(void)
@@ -160,64 +148,58 @@ EXTERN_C void test_st64b(void)
      __arm_st64b(addr, val);
  }
  
-// CHECK-C-LABEL: @test_st64bv(
+// CHECK-C-LABEL: define {{[^@]+}}@test_st64bv(
  // CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca i8*, align 8
+// CHECK-C-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
  // CHECK-C-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
-// CHECK-C-NEXT:    [[TMP0:%.*]] = load i8*, i8** @addr, align 8
-// CHECK-C-NEXT:    [[TMP1:%.*]] = bitcast %struct.data512_t* [[BYVAL_TEMP]] to i8*
-// CHECK-C-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 bitcast (%struct.data512_t* @val to i8*), i64 64, i1 false)
-// CHECK-C-NEXT:    store i8* [[TMP0]], i8** [[__ADDR_ADDR_I]], align 8
-// CHECK-C-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[__ADDR_ADDR_I]], align 8
-// CHECK-C-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_DATA512_T]], %struct.data512_t* [[BYVAL_TEMP]], i32 0, i32 0
-// CHECK-C-NEXT:    [[ARRAYDECAY_I:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[VAL_I]], i64 0, i64 0
-// CHECK-C-NEXT:    [[TMP3:%.*]] = load i64, i64* [[ARRAYDECAY_I]], align 8
-// CHECK-C-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 1
-// CHECK-C-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK-C-NEXT:    [[TMP6:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 2
-// CHECK-C-NEXT:    [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
-// CHECK-C-NEXT:    [[TMP8:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 3
-// CHECK-C-NEXT:    [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
-// CHECK-C-NEXT:    [[TMP10:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 4
-// CHECK-C-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8
-// CHECK-C-NEXT:    [[TMP12:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 5
-// CHECK-C-NEXT:    [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8
-// CHECK-C-NEXT:    [[TMP14:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 6
-// CHECK-C-NEXT:    [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8
-// CHECK-C-NEXT:    [[TMP16:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 7
-// CHECK-C-NEXT:    [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8
-// CHECK-C-NEXT:    [[TMP18:%.*]] = call i64 @llvm.aarch64.st64bv(i8* [[TMP2]], i64 [[TMP3]], i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]])
-// CHECK-C-NEXT:    store i64 [[TMP18]], i64* @status, align 8
+// CHECK-C-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
+// CHECK-C-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[BYVAL_TEMP]], ptr align 8 @val, i64 64, i1 false)
+// CHECK-C-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8
+// CHECK-C-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8
+// CHECK-C-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BYVAL_TEMP]], align 8
+// CHECK-C-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 1
+// CHECK-C-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
+// CHECK-C-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 2
+// CHECK-C-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
+// CHECK-C-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 3
+// CHECK-C-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
+// CHECK-C-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 4
+// CHECK-C-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
+// CHECK-C-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 5
+// CHECK-C-NEXT:    [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
+// CHECK-C-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 6
+// CHECK-C-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
+// CHECK-C-NEXT:    [[TMP15:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 7
+// CHECK-C-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 8
+// CHECK-C-NEXT:    [[TMP17:%.*]] = call i64 @llvm.aarch64.st64bv(ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]])
+// CHECK-C-NEXT:    store i64 [[TMP17]], ptr @status, align 8
  // CHECK-C-NEXT:    ret void
  //
-// CHECK-CXX-LABEL: @test_st64bv(
+// CHECK-CXX-LABEL: define {{[^@]+}}@test_st64bv(
  // CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca i8*, align 8
+// CHECK-CXX-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
  // CHECK-CXX-NEXT:    [[AGG_TMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load i8*, i8** @addr, align 8
-// CHECK-CXX-NEXT:    [[TMP1:%.*]] = bitcast %struct.data512_t* [[AGG_TMP]] to i8*
-// CHECK-CXX-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 bitcast (%struct.data512_t* @val to i8*), i64 64, i1 false)
-// CHECK-CXX-NEXT:    store i8* [[TMP0]], i8** [[__ADDR_ADDR_I]], align 8
-// CHECK-CXX-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[__ADDR_ADDR_I]], align 8
-// CHECK-CXX-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_DATA512_T]], %struct.data512_t* [[AGG_TMP]], i32 0, i32 0
-// CHECK-CXX-NEXT:    [[ARRAYDECAY_I:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[VAL_I]], i64 0, i64 0
-// CHECK-CXX-NEXT:    [[TMP3:%.*]] = load i64, i64* [[ARRAYDECAY_I]], align 8
-// CHECK-CXX-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 1
-// CHECK-CXX-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK-CXX-NEXT:    [[TMP6:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 2
-// CHECK-CXX-NEXT:    [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
-// CHECK-CXX-NEXT:    [[TMP8:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 3
-// CHECK-CXX-NEXT:    [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
-// CHECK-CXX-NEXT:    [[TMP10:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 4
-// CHECK-CXX-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8
-// CHECK-CXX-NEXT:    [[TMP12:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 5
-// CHECK-CXX-NEXT:    [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8
-// CHECK-CXX-NEXT:    [[TMP14:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 6
-// CHECK-CXX-NEXT:    [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8
-// CHECK-CXX-NEXT:    [[TMP16:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 7
-// CHECK-CXX-NEXT:    [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8
-// CHECK-CXX-NEXT:    [[TMP18:%.*]] = call i64 @llvm.aarch64.st64bv(i8* [[TMP2]], i64 [[TMP3]], i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]])
-// CHECK-CXX-NEXT:    store i64 [[TMP18]], i64* @status, align 8
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
+// CHECK-CXX-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[AGG_TMP]], ptr align 8 @val, i64 64, i1 false)
+// CHECK-CXX-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8
+// CHECK-CXX-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8
+// CHECK-CXX-NEXT:    [[TMP2:%.*]] = load i64, ptr [[AGG_TMP]], align 8
+// CHECK-CXX-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 1
+// CHECK-CXX-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
+// CHECK-CXX-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 2
+// CHECK-CXX-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
+// CHECK-CXX-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 3
+// CHECK-CXX-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
+// CHECK-CXX-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 4
+// CHECK-CXX-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
+// CHECK-CXX-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 5
+// CHECK-CXX-NEXT:    [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
+// CHECK-CXX-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 6
+// CHECK-CXX-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
+// CHECK-CXX-NEXT:    [[TMP15:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 7
+// CHECK-CXX-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 8
+// CHECK-CXX-NEXT:    [[TMP17:%.*]] = call i64 @llvm.aarch64.st64bv(ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]])
+// CHECK-CXX-NEXT:    store i64 [[TMP17]], ptr @status, align 8
  // CHECK-CXX-NEXT:    ret void
  //
  EXTERN_C void test_st64bv(void)
@@ -225,64 +207,58 @@ EXTERN_C void test_st64bv(void)
      status = __arm_st64bv(addr, val);
  }
  
-// CHECK-C-LABEL: @test_st64bv0(
+// CHECK-C-LABEL: define {{[^@]+}}@test_st64bv0(
  // CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca i8*, align 8
+// CHECK-C-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
  // CHECK-C-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
-// CHECK-C-NEXT:    [[TMP0:%.*]] = load i8*, i8** @addr, align 8
-// CHECK-C-NEXT:    [[TMP1:%.*]] = bitcast %struct.data512_t* [[BYVAL_TEMP]] to i8*
-// CHECK-C-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 bitcast (%struct.data512_t* @val to i8*), i64 64, i1 false)
-// CHECK-C-NEXT:    store i8* [[TMP0]], i8** [[__ADDR_ADDR_I]], align 8
-// CHECK-C-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[__ADDR_ADDR_I]], align 8
-// CHECK-C-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_DATA512_T]], %struct.data512_t* [[BYVAL_TEMP]], i32 0, i32 0
-// CHECK-C-NEXT:    [[ARRAYDECAY_I:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[VAL_I]], i64 0, i64 0
-// CHECK-C-NEXT:    [[TMP3:%.*]] = load i64, i64* [[ARRAYDECAY_I]], align 8
-// CHECK-C-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 1
-// CHECK-C-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK-C-NEXT:    [[TMP6:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 2
-// CHECK-C-NEXT:    [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
-// CHECK-C-NEXT:    [[TMP8:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 3
-// CHECK-C-NEXT:    [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
-// CHECK-C-NEXT:    [[TMP10:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 4
-// CHECK-C-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8
-// CHECK-C-NEXT:    [[TMP12:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 5
-// CHECK-C-NEXT:    [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8
-// CHECK-C-NEXT:    [[TMP14:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 6
-// CHECK-C-NEXT:    [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8
-// CHECK-C-NEXT:    [[TMP16:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 7
-// CHECK-C-NEXT:    [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8
-// CHECK-C-NEXT:    [[TMP18:%.*]] = call i64 @llvm.aarch64.st64bv0(i8* [[TMP2]], i64 [[TMP3]], i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]])
-// CHECK-C-NEXT:    store i64 [[TMP18]], i64* @status, align 8
+// CHECK-C-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
+// CHECK-C-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[BYVAL_TEMP]], ptr align 8 @val, i64 64, i1 false)
+// CHECK-C-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8
+// CHECK-C-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8
+// CHECK-C-NEXT:    [[TMP2:%.*]] = load i64, ptr [[BYVAL_TEMP]], align 8
+// CHECK-C-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 1
+// CHECK-C-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
+// CHECK-C-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 2
+// CHECK-C-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
+// CHECK-C-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 3
+// CHECK-C-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
+// CHECK-C-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 4
+// CHECK-C-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
+// CHECK-C-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 5
+// CHECK-C-NEXT:    [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
+// CHECK-C-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 6
+// CHECK-C-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
+// CHECK-C-NEXT:    [[TMP15:%.*]] = getelementptr i64, ptr [[BYVAL_TEMP]], i32 7
+// CHECK-C-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 8
+// CHECK-C-NEXT:    [[TMP17:%.*]] = call i64 @llvm.aarch64.st64bv0(ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]])
+// CHECK-C-NEXT:    store i64 [[TMP17]], ptr @status, align 8
  // CHECK-C-NEXT:    ret void
  //
-// CHECK-CXX-LABEL: @test_st64bv0(
+// CHECK-CXX-LABEL: define {{[^@]+}}@test_st64bv0(
  // CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca i8*, align 8
+// CHECK-CXX-NEXT:    [[__ADDR_ADDR_I:%.*]] = alloca ptr, align 8
  // CHECK-CXX-NEXT:    [[AGG_TMP:%.*]] = alloca [[STRUCT_DATA512_T:%.*]], align 8
-// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load i8*, i8** @addr, align 8
-// CHECK-CXX-NEXT:    [[TMP1:%.*]] = bitcast %struct.data512_t* [[AGG_TMP]] to i8*
-// CHECK-CXX-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 bitcast (%struct.data512_t* @val to i8*), i64 64, i1 false)
-// CHECK-CXX-NEXT:    store i8* [[TMP0]], i8** [[__ADDR_ADDR_I]], align 8
-// CHECK-CXX-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[__ADDR_ADDR_I]], align 8
-// CHECK-CXX-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_DATA512_T]], %struct.data512_t* [[AGG_TMP]], i32 0, i32 0
-// CHECK-CXX-NEXT:    [[ARRAYDECAY_I:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[VAL_I]], i64 0, i64 0
-// CHECK-CXX-NEXT:    [[TMP3:%.*]] = load i64, i64* [[ARRAYDECAY_I]], align 8
-// CHECK-CXX-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 1
-// CHECK-CXX-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK-CXX-NEXT:    [[TMP6:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 2
-// CHECK-CXX-NEXT:    [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
-// CHECK-CXX-NEXT:    [[TMP8:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 3
-// CHECK-CXX-NEXT:    [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
-// CHECK-CXX-NEXT:    [[TMP10:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 4
-// CHECK-CXX-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8
-// CHECK-CXX-NEXT:    [[TMP12:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 5
-// CHECK-CXX-NEXT:    [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8
-// CHECK-CXX-NEXT:    [[TMP14:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 6
-// CHECK-CXX-NEXT:    [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8
-// CHECK-CXX-NEXT:    [[TMP16:%.*]] = getelementptr i64, i64* [[ARRAYDECAY_I]], i32 7
-// CHECK-CXX-NEXT:    [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8
-// CHECK-CXX-NEXT:    [[TMP18:%.*]] = call i64 @llvm.aarch64.st64bv0(i8* [[TMP2]], i64 [[TMP3]], i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]])
-// CHECK-CXX-NEXT:    store i64 [[TMP18]], i64* @status, align 8
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load ptr, ptr @addr, align 8
+// CHECK-CXX-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[AGG_TMP]], ptr align 8 @val, i64 64, i1 false)
+// CHECK-CXX-NEXT:    store ptr [[TMP0]], ptr [[__ADDR_ADDR_I]], align 8
+// CHECK-CXX-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__ADDR_ADDR_I]], align 8
+// CHECK-CXX-NEXT:    [[TMP2:%.*]] = load i64, ptr [[AGG_TMP]], align 8
+// CHECK-CXX-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 1
+// CHECK-CXX-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
+// CHECK-CXX-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 2
+// CHECK-CXX-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
+// CHECK-CXX-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 3
+// CHECK-CXX-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
+// CHECK-CXX-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 4
+// CHECK-CXX-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
+// CHECK-CXX-NEXT:    [[TMP11:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 5
+// CHECK-CXX-NEXT:    [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
+// CHECK-CXX-NEXT:    [[TMP13:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 6
+// CHECK-CXX-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
+// CHECK-CXX-NEXT:    [[TMP15:%.*]] = getelementptr i64, ptr [[AGG_TMP]], i32 7
+// CHECK-CXX-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP15]], align 8
+// CHECK-CXX-NEXT:    [[TMP17:%.*]] = call i64 @llvm.aarch64.st64bv0(ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]])
+// CHECK-CXX-NEXT:    store i64 [[TMP17]], ptr @status, align 8
  // CHECK-CXX-NEXT:    ret void
  //
  EXTERN_C void test_st64bv0(void)
diff --git a/clang/test/CodeGen/aarch64-neon-tbl.c b/clang/test/CodeGen/aarch64-neon-tbl.c

index d103484..264e7d7 100644 (file)
--- a/clang/test/CodeGen/aarch64-neon-tbl.c
+++ b/clang/test/CodeGen/aarch64-neon-tbl.c
@@ -1,5 +1,5 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
-// RUN: %clang_cc1 -no-opaque-pointers -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
  // RUN: -disable-O0-optnone  -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
  
  // REQUIRES: aarch64-registered-target || arm-registered-target
@@ -32,18 +32,14 @@ int8x8_t test_vqtbl1_s8(int8x16_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <8 x i8>] [[A_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [2 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
  // CHECK-NEXT:    [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBL13_I]]
@@ -57,18 +53,14 @@ int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBL2_I]]
  //
@@ -81,21 +73,16 @@ int8x8_t test_vqtbl2_s8(int8x16x2_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <8 x i8>] [[A_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [3 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
@@ -110,21 +97,16 @@ int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
  // CHECK-NEXT:    [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBL3_I]]
  //
@@ -137,24 +119,18 @@ int8x8_t test_vqtbl3_s8(int8x16x3_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <8 x i8>] [[A_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [4 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
@@ -169,24 +145,18 @@ int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
  // CHECK-NEXT:    [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBL4_I]]
  //
@@ -209,18 +179,14 @@ int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBL2_I]]
  //
@@ -233,21 +199,16 @@ int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
  // CHECK-NEXT:    [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBL3_I]]
  //
@@ -260,24 +221,18 @@ int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
  // CHECK-NEXT:    [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBL4_I]]
  //
@@ -307,18 +262,14 @@ int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <8 x i8>] [[B_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [2 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
  // CHECK-NEXT:    [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBX13_I]]
@@ -332,21 +283,16 @@ int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <8 x i8>] [[B_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [3 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
@@ -367,24 +313,18 @@ int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <8 x i8>] [[B_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [4 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
  // CHECK-NEXT:    [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
@@ -409,18 +349,14 @@ int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
  // CHECK-NEXT:    [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBX2_I]]
  //
@@ -433,21 +369,16 @@ int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
  // CHECK-NEXT:    [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBX3_I]]
  //
@@ -460,24 +391,18 @@ int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
  // CHECK-NEXT:    [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBX4_I]]
  //
@@ -500,18 +425,14 @@ int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, uint8x16_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
  // CHECK-NEXT:    [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBX2_I]]
  //
@@ -524,21 +445,16 @@ int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
  // CHECK-NEXT:    [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBX3_I]]
  //
@@ -551,24 +467,18 @@ int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
  // CHECK-NEXT:    [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBX4_I]]
  //
@@ -602,18 +512,14 @@ uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <8 x i8>] [[A_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [2 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
  // CHECK-NEXT:    [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBL13_I]]
@@ -627,18 +533,14 @@ uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBL2_I]]
  //
@@ -651,21 +553,16 @@ uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <8 x i8>] [[A_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [3 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
@@ -680,21 +577,16 @@ uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
  // CHECK-NEXT:    [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBL3_I]]
  //
@@ -707,24 +599,18 @@ uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <8 x i8>] [[A_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [4 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
@@ -739,24 +625,18 @@ uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
  // CHECK-NEXT:    [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBL4_I]]
  //
@@ -779,18 +659,14 @@ uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBL2_I]]
  //
@@ -803,21 +679,16 @@ uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
  // CHECK-NEXT:    [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBL3_I]]
  //
@@ -830,24 +701,18 @@ uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
  // CHECK-NEXT:    [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBL4_I]]
  //
@@ -877,18 +742,14 @@ uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <8 x i8>] [[B_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [2 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
  // CHECK-NEXT:    [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBX13_I]]
@@ -902,21 +763,16 @@ uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <8 x i8>] [[B_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [3 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
@@ -937,24 +793,18 @@ uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <8 x i8>] [[B_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [4 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
  // CHECK-NEXT:    [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
@@ -979,18 +829,14 @@ uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
  // CHECK-NEXT:    [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBX2_I]]
  //
@@ -1003,21 +849,16 @@ uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
  // CHECK-NEXT:    [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBX3_I]]
  //
@@ -1030,24 +871,18 @@ uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
  // CHECK-NEXT:    [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBX4_I]]
  //
@@ -1070,18 +905,14 @@ uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
  // CHECK-NEXT:    [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBX2_I]]
  //
@@ -1094,21 +925,16 @@ uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
  // CHECK-NEXT:    [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBX3_I]]
  //
@@ -1121,24 +947,18 @@ uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
  // CHECK-NEXT:    [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBX4_I]]
  //
@@ -1172,18 +992,14 @@ poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <8 x i8>] [[A_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [2 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
  // CHECK-NEXT:    [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBL13_I]]
@@ -1197,18 +1013,14 @@ poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBL2_I]]
  //
@@ -1221,21 +1033,16 @@ poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <8 x i8>] [[A_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [3 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
@@ -1250,21 +1057,16 @@ poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
  // CHECK-NEXT:    [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBL3_I]]
  //
@@ -1277,24 +1079,18 @@ poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <8 x i8>] [[A_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <8 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [4 x <8 x i8>] [[TMP0]], ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
@@ -1309,24 +1105,18 @@ poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
  // CHECK-NEXT:    [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBL4_I]]
  //
@@ -1349,18 +1139,14 @@ poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBL2_I]]
  //
@@ -1373,21 +1159,16 @@ poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
  // CHECK-NEXT:    [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBL3_I]]
  //
@@ -1400,24 +1181,18 @@ poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[A]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <16 x i8>] [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P0_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
  // CHECK-NEXT:    [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBL4_I]]
  //
@@ -1447,18 +1222,14 @@ poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <8 x i8>] [[B_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [2 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
  // CHECK-NEXT:    [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBX13_I]]
@@ -1472,21 +1243,16 @@ poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <8 x i8>] [[B_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [3 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
  // CHECK-NEXT:    [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
@@ -1507,24 +1273,18 @@ poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <8 x i8>] [[B_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <8 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <8 x i8>], ptr [[COERCE_DIVE1]], align 8
+// CHECK-NEXT:    store [4 x <8 x i8>] [[TMP0]], ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
  // CHECK-NEXT:    [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  // CHECK-NEXT:    [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
@@ -1549,18 +1309,14 @@ poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
  // CHECK-NEXT:    [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBX2_I]]
  //
@@ -1573,21 +1329,16 @@ poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
  // CHECK-NEXT:    [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBX3_I]]
  //
@@ -1600,24 +1351,18 @@ poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
  // CHECK-NEXT:    [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
  // CHECK-NEXT:    ret <8 x i8> [[VTBX4_I]]
  //
@@ -1640,18 +1385,14 @@ poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [2 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [2 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
  // CHECK-NEXT:    [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBX2_I]]
  //
@@ -1664,21 +1405,16 @@ poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [3 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [3 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [3 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
  // CHECK-NEXT:    [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBX3_I]]
  //
@@ -1691,24 +1427,18 @@ poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[B]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
-// CHECK-NEXT:    [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
-// CHECK-NEXT:    [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
-// CHECK-NEXT:    [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
-// CHECK-NEXT:    [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
-// CHECK-NEXT:    [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
-// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
-// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x <16 x i8>] [[B_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load [4 x <16 x i8>], ptr [[COERCE_DIVE1]], align 16
+// CHECK-NEXT:    store [4 x <16 x i8>] [[TMP0]], ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__P1_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4_I]], align 16
+// CHECK-NEXT:    [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[__P1_I]], i64 0, i64 3
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6_I]], align 16
  // CHECK-NEXT:    [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
  // CHECK-NEXT:    ret <16 x i8> [[VTBX4_I]]
  //
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c

index 5873037..2173291 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -15,42 +15,36 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld1_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z15test_svld1_bf16u10__SVBool_tPKu6__bf16(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svld1_bf16u10__SVBool_tPKu6__bf16(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
  //
  svbfloat16_t test_svld1_bf16(svbool_t pg, const bfloat16_t *base)
  {
    return SVE_ACLE_FUNC(svld1,_bf16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1_vnum_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_vnum_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast bfloat* [[TMP2]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z20test_svld1_vnum_bf16u10__SVBool_tPKu6__bf16l(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svld1_vnum_bf16u10__SVBool_tPKu6__bf16l(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast bfloat* [[TMP2]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
  //
  svbfloat16_t test_svld1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c

index 93280df..94d6f97 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,467 +14,401 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld1_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
-// CPP-CHECK-LABEL: @_Z13test_svld1_s8u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svld1_s8u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
  svint8_t test_svld1_s8(svbool_t pg, const int8_t *base)
  {
    return SVE_ACLE_FUNC(svld1,_s8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld1_s16u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld1_s16u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
  svint16_t test_svld1_s16(svbool_t pg, const int16_t *base)
  {
    return SVE_ACLE_FUNC(svld1,_s16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld1_s32u10__SVBool_tPKi(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld1_s32u10__SVBool_tPKi(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svint32_t test_svld1_s32(svbool_t pg, const int32_t *base)
  {
    return SVE_ACLE_FUNC(svld1,_s32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld1_s64u10__SVBool_tPKl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld1_s64u10__SVBool_tPKl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svint64_t test_svld1_s64(svbool_t pg, const int64_t *base)
  {
    return SVE_ACLE_FUNC(svld1,_s64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
-// CPP-CHECK-LABEL: @_Z13test_svld1_u8u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svld1_u8u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
  svuint8_t test_svld1_u8(svbool_t pg, const uint8_t *base)
  {
    return SVE_ACLE_FUNC(svld1,_u8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld1_u16u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld1_u16u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
  svuint16_t test_svld1_u16(svbool_t pg, const uint16_t *base)
  {
    return SVE_ACLE_FUNC(svld1,_u16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld1_u32u10__SVBool_tPKj(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld1_u32u10__SVBool_tPKj(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svuint32_t test_svld1_u32(svbool_t pg, const uint32_t *base)
  {
    return SVE_ACLE_FUNC(svld1,_u32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld1_u64u10__SVBool_tPKm(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld1_u64u10__SVBool_tPKm(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svuint64_t test_svld1_u64(svbool_t pg, const uint64_t *base)
  {
    return SVE_ACLE_FUNC(svld1,_u64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld1_f16u10__SVBool_tPKDh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld1_f16u10__SVBool_tPKDh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
  //
  svfloat16_t test_svld1_f16(svbool_t pg, const float16_t *base)
  {
    return SVE_ACLE_FUNC(svld1,_f16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld1_f32u10__SVBool_tPKf(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld1_f32u10__SVBool_tPKf(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
  svfloat32_t test_svld1_f32(svbool_t pg, const float32_t *base)
  {
    return SVE_ACLE_FUNC(svld1,_f32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld1_f64u10__SVBool_tPKd(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld1_f64u10__SVBool_tPKd(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
  svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base)
  {
    return SVE_ACLE_FUNC(svld1,_f64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1_vnum_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_vnum_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svld1_vnum_s8u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svld1_vnum_s8u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
  svint8_t test_svld1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld1_vnum,_s8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_s16u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld1_vnum_s16u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
  svint16_t test_svld1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld1_vnum,_s16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_s32u10__SVBool_tPKil(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld1_vnum_s32u10__SVBool_tPKil(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svint32_t test_svld1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld1_vnum,_s32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_s64u10__SVBool_tPKll(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld1_vnum_s64u10__SVBool_tPKll(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svint64_t test_svld1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld1_vnum,_s64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1_vnum_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_vnum_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svld1_vnum_u8u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svld1_vnum_u8u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
  svuint8_t test_svld1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld1_vnum,_u8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_u16u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld1_vnum_u16u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
  svuint16_t test_svld1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld1_vnum,_u16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_u32u10__SVBool_tPKjl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld1_vnum_u32u10__SVBool_tPKjl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svuint32_t test_svld1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld1_vnum,_u32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_u64u10__SVBool_tPKml(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld1_vnum_u64u10__SVBool_tPKml(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svuint64_t test_svld1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld1_vnum,_u64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1_vnum_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_vnum_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast half* [[TMP2]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_f16u10__SVBool_tPKDhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld1_vnum_f16u10__SVBool_tPKDhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast half* [[TMP2]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
  //
  svfloat16_t test_svld1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld1_vnum,_f16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1_vnum_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_vnum_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_f32u10__SVBool_tPKfl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld1_vnum_f32u10__SVBool_tPKfl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
  //
  svfloat32_t test_svld1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld1_vnum,_f32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1_vnum_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_vnum_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[TMP2]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
-// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_f64u10__SVBool_tPKdl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld1_vnum_f64u10__SVBool_tPKdl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[TMP2]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
-// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
  //
  svfloat64_t test_svld1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld1_vnum,_f64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32base_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32base_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z29test_svld1_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z29test_svld1_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -484,13 +418,13 @@ svint32_t test_svld1_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svld1_gather, _u32base, _s32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z29test_svld1_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z29test_svld1_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -500,13 +434,13 @@ svint64_t test_svld1_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32base_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32base_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z29test_svld1_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z29test_svld1_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -516,13 +450,13 @@ svuint32_t test_svld1_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svld1_gather, _u32base, _u32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z29test_svld1_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z29test_svld1_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -532,13 +466,13 @@ svuint64_t test_svld1_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32base_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32base_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z29test_svld1_gather_u32base_f32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z29test_svld1_gather_u32base_f32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -548,13 +482,13 @@ svfloat32_t test_svld1_gather_u32base_f32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svld1_gather, _u32base, _f32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64base_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64base_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z29test_svld1_gather_u64base_f64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z29test_svld1_gather_u64base_f64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -564,205 +498,205 @@ svfloat64_t test_svld1_gather_u64base_f64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1_gather, _u64base, _f64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1_gather_s32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_s32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1_gather_s32offset_s32u10__SVBool_tPKiu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1_gather_s32offset_s32u10__SVBool_tPKiu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svint32_t test_svld1_gather_s32offset_s32(svbool_t pg, const int32_t *base, svint32_t offsets) {
    return SVE_ACLE_FUNC(svld1_gather_, s32, offset, _s32)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1_gather_s64offset_s64u10__SVBool_tPKlu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1_gather_s64offset_s64u10__SVBool_tPKlu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svint64_t test_svld1_gather_s64offset_s64(svbool_t pg, const int64_t *base, svint64_t offsets) {
    return SVE_ACLE_FUNC(svld1_gather_, s64, offset, _s64)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1_gather_s32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_s32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1_gather_s32offset_u32u10__SVBool_tPKju11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1_gather_s32offset_u32u10__SVBool_tPKju11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svuint32_t test_svld1_gather_s32offset_u32(svbool_t pg, const uint32_t *base, svint32_t offsets) {
    return SVE_ACLE_FUNC(svld1_gather_, s32, offset, _u32)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1_gather_s64offset_u64u10__SVBool_tPKmu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1_gather_s64offset_u64u10__SVBool_tPKmu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svuint64_t test_svld1_gather_s64offset_u64(svbool_t pg, const uint64_t *base, svint64_t offsets) {
    return SVE_ACLE_FUNC(svld1_gather_, s64, offset, _u64)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1_gather_s32offset_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_s32offset_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1_gather_s32offset_f32u10__SVBool_tPKfu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1_gather_s32offset_f32u10__SVBool_tPKfu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
  svfloat32_t test_svld1_gather_s32offset_f32(svbool_t pg, const float32_t *base, svint32_t offsets) {
    return SVE_ACLE_FUNC(svld1_gather_, s32, offset, _f32)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1_gather_s64offset_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_s64offset_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1_gather_s64offset_f64u10__SVBool_tPKdu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1_gather_s64offset_f64u10__SVBool_tPKdu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
  svfloat64_t test_svld1_gather_s64offset_f64(svbool_t pg, const float64_t *base, svint64_t offsets) {
    return SVE_ACLE_FUNC(svld1_gather_, s64, offset, _f64)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1_gather_u32offset_s32u10__SVBool_tPKiu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1_gather_u32offset_s32u10__SVBool_tPKiu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svint32_t test_svld1_gather_u32offset_s32(svbool_t pg, const int32_t *base, svuint32_t offsets) {
    return SVE_ACLE_FUNC(svld1_gather_, u32, offset, _s32)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1_gather_u64offset_s64u10__SVBool_tPKlu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1_gather_u64offset_s64u10__SVBool_tPKlu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svint64_t test_svld1_gather_u64offset_s64(svbool_t pg, const int64_t *base, svuint64_t offsets) {
    return SVE_ACLE_FUNC(svld1_gather_, u64, offset, _s64)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1_gather_u32offset_u32u10__SVBool_tPKju12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1_gather_u32offset_u32u10__SVBool_tPKju12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svuint32_t test_svld1_gather_u32offset_u32(svbool_t pg, const uint32_t *base, svuint32_t offsets) {
    return SVE_ACLE_FUNC(svld1_gather_, u32, offset, _u32)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1_gather_u64offset_u64u10__SVBool_tPKmu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1_gather_u64offset_u64u10__SVBool_tPKmu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svuint64_t test_svld1_gather_u64offset_u64(svbool_t pg, const uint64_t *base, svuint64_t offsets) {
    return SVE_ACLE_FUNC(svld1_gather_, u64, offset, _u64)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32offset_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32offset_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1_gather_u32offset_f32u10__SVBool_tPKfu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1_gather_u32offset_f32u10__SVBool_tPKfu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
  svfloat32_t test_svld1_gather_u32offset_f32(svbool_t pg, const float32_t *base, svuint32_t offsets) {
    return SVE_ACLE_FUNC(svld1_gather_, u32, offset, _f32)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64offset_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64offset_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1_gather_u64offset_f64u10__SVBool_tPKdu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1_gather_u64offset_f64u10__SVBool_tPKdu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
  svfloat64_t test_svld1_gather_u64offset_f64(svbool_t pg, const float64_t *base, svuint64_t offsets) {
    return SVE_ACLE_FUNC(svld1_gather_, u64, offset, _f64)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32base_offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32base_offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z36test_svld1_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z36test_svld1_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -772,13 +706,13 @@ svint32_t test_svld1_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, in
    return SVE_ACLE_FUNC(svld1_gather, _u32base, _offset_s32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z36test_svld1_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z36test_svld1_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -788,13 +722,13 @@ svint64_t test_svld1_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, in
    return SVE_ACLE_FUNC(svld1_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32base_offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32base_offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z36test_svld1_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z36test_svld1_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -804,13 +738,13 @@ svuint32_t test_svld1_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, i
    return SVE_ACLE_FUNC(svld1_gather, _u32base, _offset_u32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z36test_svld1_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z36test_svld1_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -820,13 +754,13 @@ svuint64_t test_svld1_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, i
    return SVE_ACLE_FUNC(svld1_gather, _u64base, _offset_u64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32base_offset_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32base_offset_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z36test_svld1_gather_u32base_offset_f32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z36test_svld1_gather_u32base_offset_f32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -836,13 +770,13 @@ svfloat32_t test_svld1_gather_u32base_offset_f32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svld1_gather, _u32base, _offset_f32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64base_offset_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64base_offset_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z36test_svld1_gather_u64base_offset_f64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z36test_svld1_gather_u64base_offset_f64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -852,206 +786,206 @@ svfloat64_t test_svld1_gather_u64base_offset_f64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svld1_gather, _u64base, _offset_f64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1_gather_s32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_s32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z30test_svld1_gather_s32index_s32u10__SVBool_tPKiu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svld1_gather_s32index_s32u10__SVBool_tPKiu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svint32_t test_svld1_gather_s32index_s32(svbool_t pg, const int32_t *base, svint32_t indices) {
    return SVE_ACLE_FUNC(svld1_gather_, s32, index, _s32)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1_gather_s64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_s64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z30test_svld1_gather_s64index_s64u10__SVBool_tPKlu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svld1_gather_s64index_s64u10__SVBool_tPKlu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svint64_t test_svld1_gather_s64index_s64(svbool_t pg, const int64_t *base, svint64_t indices) {
    return SVE_ACLE_FUNC(svld1_gather_, s64, index, _s64)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1_gather_s32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_s32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z30test_svld1_gather_s32index_u32u10__SVBool_tPKju11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svld1_gather_s32index_u32u10__SVBool_tPKju11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svuint32_t test_svld1_gather_s32index_u32(svbool_t pg, const uint32_t *base, svint32_t indices) {
    return SVE_ACLE_FUNC(svld1_gather_, s32, index, _u32)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1_gather_s64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_s64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z30test_svld1_gather_s64index_u64u10__SVBool_tPKmu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svld1_gather_s64index_u64u10__SVBool_tPKmu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svuint64_t test_svld1_gather_s64index_u64(svbool_t pg, const uint64_t *base, svint64_t indices) {
    return SVE_ACLE_FUNC(svld1_gather_, s64, index, _u64)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1_gather_s32index_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_s32index_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z30test_svld1_gather_s32index_f32u10__SVBool_tPKfu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svld1_gather_s32index_f32u10__SVBool_tPKfu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
  svfloat32_t test_svld1_gather_s32index_f32(svbool_t pg, const float32_t *base, svint32_t indices) {
    return SVE_ACLE_FUNC(svld1_gather_, s32, index, _f32)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1_gather_s64index_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_s64index_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z30test_svld1_gather_s64index_f64u10__SVBool_tPKdu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svld1_gather_s64index_f64u10__SVBool_tPKdu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
  svfloat64_t test_svld1_gather_s64index_f64(svbool_t pg, const float64_t *base, svint64_t indices) {
    return SVE_ACLE_FUNC(svld1_gather_, s64, index, _f64)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z30test_svld1_gather_u32index_s32u10__SVBool_tPKiu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svld1_gather_u32index_s32u10__SVBool_tPKiu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svint32_t test_svld1_gather_u32index_s32(svbool_t pg, const int32_t *base, svuint32_t indices) {
    return SVE_ACLE_FUNC(svld1_gather_, u32, index, _s32)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z30test_svld1_gather_u64index_s64u10__SVBool_tPKlu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svld1_gather_u64index_s64u10__SVBool_tPKlu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svint64_t test_svld1_gather_u64index_s64(svbool_t pg, const int64_t *base, svuint64_t indices) {
    return SVE_ACLE_FUNC(svld1_gather_, u64, index, _s64)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z30test_svld1_gather_u32index_u32u10__SVBool_tPKju12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svld1_gather_u32index_u32u10__SVBool_tPKju12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svuint32_t test_svld1_gather_u32index_u32(svbool_t pg, const uint32_t *base, svuint32_t indices) {
    return SVE_ACLE_FUNC(svld1_gather_, u32, index, _u32)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z30test_svld1_gather_u64index_u64u10__SVBool_tPKmu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svld1_gather_u64index_u64u10__SVBool_tPKmu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svuint64_t test_svld1_gather_u64index_u64(svbool_t pg, const uint64_t *base, svuint64_t indices) {
    return SVE_ACLE_FUNC(svld1_gather_, u64, index, _u64)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32index_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32index_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z30test_svld1_gather_u32index_f32u10__SVBool_tPKfu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svld1_gather_u32index_f32u10__SVBool_tPKfu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
  svfloat32_t test_svld1_gather_u32index_f32(svbool_t pg, const float32_t *base, svuint32_t indices) {
    return SVE_ACLE_FUNC(svld1_gather_, u32, index, _f32)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64index_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64index_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z30test_svld1_gather_u64index_f64u10__SVBool_tPKdu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svld1_gather_u64index_f64u10__SVBool_tPKdu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
  svfloat64_t test_svld1_gather_u64index_f64(svbool_t pg, const float64_t *base, svuint64_t indices) {
    return SVE_ACLE_FUNC(svld1_gather_, u64, index, _f64)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32base_index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32base_index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svld1_gather_u32base_index_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svld1_gather_u32base_index_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -1062,14 +996,14 @@ svint32_t test_svld1_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int
    return SVE_ACLE_FUNC(svld1_gather, _u32base, _index_s32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64base_index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64base_index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svld1_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svld1_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
@@ -1080,14 +1014,14 @@ svint64_t test_svld1_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int
    return SVE_ACLE_FUNC(svld1_gather, _u64base, _index_s64, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32base_index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32base_index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svld1_gather_u32base_index_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svld1_gather_u32base_index_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -1098,14 +1032,14 @@ svuint32_t test_svld1_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, in
    return SVE_ACLE_FUNC(svld1_gather, _u32base, _index_u32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64base_index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64base_index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svld1_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svld1_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
@@ -1116,14 +1050,14 @@ svuint64_t test_svld1_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, in
    return SVE_ACLE_FUNC(svld1_gather, _u64base, _index_u64, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u32base_index_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u32base_index_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svld1_gather_u32base_index_f32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svld1_gather_u32base_index_f32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -1134,14 +1068,14 @@ svfloat32_t test_svld1_gather_u32base_index_f32(svbool_t pg, svuint32_t bases, i
    return SVE_ACLE_FUNC(svld1_gather, _u32base, _index_f32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svld1_gather_u64base_index_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1_gather_u64base_index_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svld1_gather_u64base_index_f64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svld1_gather_u64base_index_f64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c

index 9d45dbd..1ffce74 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,290 +14,254 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld1sb_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1sb_s16u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1sb_s16u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
  svint16_t test_svld1sb_s16(svbool_t pg, const int8_t *base)
  {
    return svld1sb_s16(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1sb_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1sb_s32u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1sb_s32u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svint32_t test_svld1sb_s32(svbool_t pg, const int8_t *base)
  {
    return svld1sb_s32(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1sb_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1sb_s64u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1sb_s64u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svint64_t test_svld1sb_s64(svbool_t pg, const int8_t *base)
  {
    return svld1sb_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1sb_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1sb_u16u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1sb_u16u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
  svuint16_t test_svld1sb_u16(svbool_t pg, const int8_t *base)
  {
    return svld1sb_u16(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1sb_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1sb_u32u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1sb_u32u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svuint32_t test_svld1sb_u32(svbool_t pg, const int8_t *base)
  {
    return svld1sb_u32(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1sb_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1sb_u64u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1sb_u64u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svuint64_t test_svld1sb_u64(svbool_t pg, const int8_t *base)
  {
    return svld1sb_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1sb_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1sb_vnum_s16u10__SVBool_tPKal(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1sb_vnum_s16u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
  svint16_t test_svld1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svld1sb_vnum_s16(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1sb_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1sb_vnum_s32u10__SVBool_tPKal(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1sb_vnum_s32u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svint32_t test_svld1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svld1sb_vnum_s32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1sb_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1sb_vnum_s64u10__SVBool_tPKal(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1sb_vnum_s64u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svld1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svld1sb_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1sb_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1sb_vnum_u16u10__SVBool_tPKal(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1sb_vnum_u16u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
  svuint16_t test_svld1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svld1sb_vnum_u16(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1sb_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1sb_vnum_u32u10__SVBool_tPKal(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1sb_vnum_u32u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svuint32_t test_svld1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svld1sb_vnum_u32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1sb_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1sb_vnum_u64u10__SVBool_tPKal(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1sb_vnum_u64u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svld1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svld1sb_vnum_u64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_u32base_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_u32base_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1sb_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1sb_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -308,14 +272,14 @@ svint32_t test_svld1sb_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svld1sb_gather, _u32base, _s32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1sb_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1sb_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -326,14 +290,14 @@ svint64_t test_svld1sb_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1sb_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_u32base_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_u32base_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1sb_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1sb_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -344,14 +308,14 @@ svuint32_t test_svld1sb_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svld1sb_gather, _u32base, _u32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1sb_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1sb_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -362,17 +326,17 @@ svuint64_t test_svld1sb_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1sb_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_s32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_s32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sb_gather_s32offset_s32u10__SVBool_tPKau11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sb_gather_s32offset_s32u10__SVBool_tPKau11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -380,17 +344,17 @@ svint32_t test_svld1sb_gather_s32offset_s32(svbool_t pg, const int8_t *base, svi
    return SVE_ACLE_FUNC(svld1sb_gather_, s32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sb_gather_s64offset_s64u10__SVBool_tPKau11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sb_gather_s64offset_s64u10__SVBool_tPKau11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -398,17 +362,17 @@ svint64_t test_svld1sb_gather_s64offset_s64(svbool_t pg, const int8_t *base, svi
    return SVE_ACLE_FUNC(svld1sb_gather_, s64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_s32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_s32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sb_gather_s32offset_u32u10__SVBool_tPKau11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sb_gather_s32offset_u32u10__SVBool_tPKau11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -416,17 +380,17 @@ svuint32_t test_svld1sb_gather_s32offset_u32(svbool_t pg, const int8_t *base, sv
    return SVE_ACLE_FUNC(svld1sb_gather_, s32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sb_gather_s64offset_u64u10__SVBool_tPKau11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sb_gather_s64offset_u64u10__SVBool_tPKau11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -434,17 +398,17 @@ svuint64_t test_svld1sb_gather_s64offset_u64(svbool_t pg, const int8_t *base, sv
    return SVE_ACLE_FUNC(svld1sb_gather_, s64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_u32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_u32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sb_gather_u32offset_s32u10__SVBool_tPKau12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sb_gather_u32offset_s32u10__SVBool_tPKau12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -452,17 +416,17 @@ svint32_t test_svld1sb_gather_u32offset_s32(svbool_t pg, const int8_t *base, svu
    return SVE_ACLE_FUNC(svld1sb_gather_, u32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sb_gather_u64offset_s64u10__SVBool_tPKau12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sb_gather_u64offset_s64u10__SVBool_tPKau12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -470,17 +434,17 @@ svint64_t test_svld1sb_gather_u64offset_s64(svbool_t pg, const int8_t *base, svu
    return SVE_ACLE_FUNC(svld1sb_gather_, u64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_u32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_u32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sb_gather_u32offset_u32u10__SVBool_tPKau12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sb_gather_u32offset_u32u10__SVBool_tPKau12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -488,17 +452,17 @@ svuint32_t test_svld1sb_gather_u32offset_u32(svbool_t pg, const int8_t *base, sv
    return SVE_ACLE_FUNC(svld1sb_gather_, u32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sb_gather_u64offset_u64u10__SVBool_tPKau12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sb_gather_u64offset_u64u10__SVBool_tPKau12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -506,14 +470,14 @@ svuint64_t test_svld1sb_gather_u64offset_u64(svbool_t pg, const int8_t *base, sv
    return SVE_ACLE_FUNC(svld1sb_gather_, u64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_u32base_offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_u32base_offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1sb_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1sb_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -524,14 +488,14 @@ svint32_t test_svld1sb_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svld1sb_gather, _u32base, _offset_s32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1sb_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1sb_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -542,14 +506,14 @@ svint64_t test_svld1sb_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svld1sb_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_u32base_offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_u32base_offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1sb_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1sb_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -560,14 +524,14 @@ svuint32_t test_svld1sb_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svld1sb_gather, _u32base, _offset_u32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1sb_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sb_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1sb_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1sb_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c

index 0e110d8..1b89ecf 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,198 +14,174 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld1sh_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1sh_s32u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1sh_s32u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svint32_t test_svld1sh_s32(svbool_t pg, const int16_t *base)
  {
    return svld1sh_s32(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1sh_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1sh_s64u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1sh_s64u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svint64_t test_svld1sh_s64(svbool_t pg, const int16_t *base)
  {
    return svld1sh_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1sh_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1sh_u32u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1sh_u32u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svuint32_t test_svld1sh_u32(svbool_t pg, const int16_t *base)
  {
    return svld1sh_u32(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1sh_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1sh_u64u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1sh_u64u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svuint64_t test_svld1sh_u64(svbool_t pg, const int16_t *base)
  {
    return svld1sh_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1sh_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svld1sh_vnum_s32u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1sh_vnum_s32u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return svld1sh_vnum_s32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1sh_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svld1sh_vnum_s64u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1sh_vnum_s64u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return svld1sh_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1sh_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svld1sh_vnum_u32u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1sh_vnum_u32u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return svld1sh_vnum_u32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1sh_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svld1sh_vnum_u64u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1sh_vnum_u64u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svld1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return svld1sh_vnum_u64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u32base_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u32base_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1sh_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1sh_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -216,14 +192,14 @@ svint32_t test_svld1sh_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svld1sh_gather, _u32base, _s32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1sh_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1sh_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -234,14 +210,14 @@ svint64_t test_svld1sh_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1sh_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u32base_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u32base_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1sh_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1sh_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -252,14 +228,14 @@ svuint32_t test_svld1sh_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svld1sh_gather, _u32base, _u32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1sh_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1sh_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -270,17 +246,17 @@ svuint64_t test_svld1sh_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1sh_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_s32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_s32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sh_gather_s32offset_s32u10__SVBool_tPKsu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sh_gather_s32offset_s32u10__SVBool_tPKsu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -288,17 +264,17 @@ svint32_t test_svld1sh_gather_s32offset_s32(svbool_t pg, const int16_t *base, sv
    return SVE_ACLE_FUNC(svld1sh_gather_, s32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sh_gather_s64offset_s64u10__SVBool_tPKsu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sh_gather_s64offset_s64u10__SVBool_tPKsu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -306,17 +282,17 @@ svint64_t test_svld1sh_gather_s64offset_s64(svbool_t pg, const int16_t *base, sv
    return SVE_ACLE_FUNC(svld1sh_gather_, s64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_s32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_s32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sh_gather_s32offset_u32u10__SVBool_tPKsu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sh_gather_s32offset_u32u10__SVBool_tPKsu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -324,17 +300,17 @@ svuint32_t test_svld1sh_gather_s32offset_u32(svbool_t pg, const int16_t *base, s
    return SVE_ACLE_FUNC(svld1sh_gather_, s32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sh_gather_s64offset_u64u10__SVBool_tPKsu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sh_gather_s64offset_u64u10__SVBool_tPKsu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -342,17 +318,17 @@ svuint64_t test_svld1sh_gather_s64offset_u64(svbool_t pg, const int16_t *base, s
    return SVE_ACLE_FUNC(svld1sh_gather_, s64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sh_gather_u32offset_s32u10__SVBool_tPKsu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sh_gather_u32offset_s32u10__SVBool_tPKsu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -360,17 +336,17 @@ svint32_t test_svld1sh_gather_u32offset_s32(svbool_t pg, const int16_t *base, sv
    return SVE_ACLE_FUNC(svld1sh_gather_, u32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sh_gather_u64offset_s64u10__SVBool_tPKsu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sh_gather_u64offset_s64u10__SVBool_tPKsu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -378,17 +354,17 @@ svint64_t test_svld1sh_gather_u64offset_s64(svbool_t pg, const int16_t *base, sv
    return SVE_ACLE_FUNC(svld1sh_gather_, u64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sh_gather_u32offset_u32u10__SVBool_tPKsu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sh_gather_u32offset_u32u10__SVBool_tPKsu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -396,17 +372,17 @@ svuint32_t test_svld1sh_gather_u32offset_u32(svbool_t pg, const int16_t *base, s
    return SVE_ACLE_FUNC(svld1sh_gather_, u32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sh_gather_u64offset_u64u10__SVBool_tPKsu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sh_gather_u64offset_u64u10__SVBool_tPKsu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -414,14 +390,14 @@ svuint64_t test_svld1sh_gather_u64offset_u64(svbool_t pg, const int16_t *base, s
    return SVE_ACLE_FUNC(svld1sh_gather_, u64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u32base_offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u32base_offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1sh_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1sh_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -432,14 +408,14 @@ svint32_t test_svld1sh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svld1sh_gather, _u32base, _offset_s32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1sh_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1sh_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -450,14 +426,14 @@ svint64_t test_svld1sh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svld1sh_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u32base_offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u32base_offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1sh_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1sh_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -468,14 +444,14 @@ svuint32_t test_svld1sh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svld1sh_gather, _u32base, _offset_u32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1sh_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1sh_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -486,17 +462,17 @@ svuint64_t test_svld1sh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svld1sh_gather, _u64base, _offset_u64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_s32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_s32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1sh_gather_s32index_s32u10__SVBool_tPKsu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1sh_gather_s32index_s32u10__SVBool_tPKsu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -504,17 +480,17 @@ svint32_t test_svld1sh_gather_s32index_s32(svbool_t pg, const int16_t *base, svi
    return SVE_ACLE_FUNC(svld1sh_gather_, s32, index_s32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_s64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_s64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1sh_gather_s64index_s64u10__SVBool_tPKsu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1sh_gather_s64index_s64u10__SVBool_tPKsu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -522,17 +498,17 @@ svint64_t test_svld1sh_gather_s64index_s64(svbool_t pg, const int16_t *base, svi
    return SVE_ACLE_FUNC(svld1sh_gather_, s64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_s32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_s32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1sh_gather_s32index_u32u10__SVBool_tPKsu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1sh_gather_s32index_u32u10__SVBool_tPKsu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -540,17 +516,17 @@ svuint32_t test_svld1sh_gather_s32index_u32(svbool_t pg, const int16_t *base, sv
    return SVE_ACLE_FUNC(svld1sh_gather_, s32, index_u32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_s64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_s64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1sh_gather_s64index_u64u10__SVBool_tPKsu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1sh_gather_s64index_u64u10__SVBool_tPKsu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -558,17 +534,17 @@ svuint64_t test_svld1sh_gather_s64index_u64(svbool_t pg, const int16_t *base, sv
    return SVE_ACLE_FUNC(svld1sh_gather_, s64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1sh_gather_u32index_s32u10__SVBool_tPKsu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1sh_gather_u32index_s32u10__SVBool_tPKsu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -576,17 +552,17 @@ svint32_t test_svld1sh_gather_u32index_s32(svbool_t pg, const int16_t *base, svu
    return SVE_ACLE_FUNC(svld1sh_gather_, u32, index_s32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1sh_gather_u64index_s64u10__SVBool_tPKsu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1sh_gather_u64index_s64u10__SVBool_tPKsu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -594,17 +570,17 @@ svint64_t test_svld1sh_gather_u64index_s64(svbool_t pg, const int16_t *base, svu
    return SVE_ACLE_FUNC(svld1sh_gather_, u64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1sh_gather_u32index_u32u10__SVBool_tPKsu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1sh_gather_u32index_u32u10__SVBool_tPKsu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -612,17 +588,17 @@ svuint32_t test_svld1sh_gather_u32index_u32(svbool_t pg, const int16_t *base, sv
    return SVE_ACLE_FUNC(svld1sh_gather_, u32, index_u32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1sh_gather_u64index_u64u10__SVBool_tPKsu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1sh_gather_u64index_u64u10__SVBool_tPKsu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -630,7 +606,7 @@ svuint64_t test_svld1sh_gather_u64index_u64(svbool_t pg, const int16_t *base, sv
    return SVE_ACLE_FUNC(svld1sh_gather_, u64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u32base_index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u32base_index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -638,7 +614,7 @@ svuint64_t test_svld1sh_gather_u64index_u64(svbool_t pg, const int16_t *base, sv
  // CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svld1sh_gather_u32base_index_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svld1sh_gather_u32base_index_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -650,7 +626,7 @@ svint32_t test_svld1sh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, i
    return SVE_ACLE_FUNC(svld1sh_gather, _u32base, _index_s32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u64base_index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u64base_index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -658,7 +634,7 @@ svint32_t test_svld1sh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, i
  // CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svld1sh_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svld1sh_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -670,7 +646,7 @@ svint64_t test_svld1sh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, i
    return SVE_ACLE_FUNC(svld1sh_gather, _u64base, _index_s64, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u32base_index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u32base_index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -678,7 +654,7 @@ svint64_t test_svld1sh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, i
  // CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svld1sh_gather_u32base_index_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svld1sh_gather_u32base_index_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -690,7 +666,7 @@ svuint32_t test_svld1sh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svld1sh_gather, _u32base, _index_u32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svld1sh_gather_u64base_index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sh_gather_u64base_index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -698,7 +674,7 @@ svuint32_t test_svld1sh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases,
  // CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svld1sh_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svld1sh_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c

index 2c16b1e..bd5aa89 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,106 +14,94 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld1sw_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1sw_s64u10__SVBool_tPKi(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1sw_s64u10__SVBool_tPKi(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svint64_t test_svld1sw_s64(svbool_t pg, const int32_t *base)
  {
    return svld1sw_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1sw_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1sw_u64u10__SVBool_tPKi(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1sw_u64u10__SVBool_tPKi(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svuint64_t test_svld1sw_u64(svbool_t pg, const int32_t *base)
  {
    return svld1sw_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1sw_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1sw_vnum_s64u10__SVBool_tPKil(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1sw_vnum_s64u10__SVBool_tPKil(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svld1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum)
  {
    return svld1sw_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1sw_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1sw_vnum_u64u10__SVBool_tPKil(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1sw_vnum_u64u10__SVBool_tPKil(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svld1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum)
  {
    return svld1sw_vnum_u64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1sw_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1sw_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -124,14 +112,14 @@ svint64_t test_svld1sw_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1sw_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1sw_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1sw_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -142,17 +130,17 @@ svuint64_t test_svld1sw_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1sw_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sw_gather_s64offset_s64u10__SVBool_tPKiu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sw_gather_s64offset_s64u10__SVBool_tPKiu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -160,17 +148,17 @@ svint64_t test_svld1sw_gather_s64offset_s64(svbool_t pg, const int32_t *base, sv
    return SVE_ACLE_FUNC(svld1sw_gather_, s64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sw_gather_s64offset_u64u10__SVBool_tPKiu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sw_gather_s64offset_u64u10__SVBool_tPKiu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -178,17 +166,17 @@ svuint64_t test_svld1sw_gather_s64offset_u64(svbool_t pg, const int32_t *base, s
    return SVE_ACLE_FUNC(svld1sw_gather_, s64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sw_gather_u64offset_s64u10__SVBool_tPKiu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sw_gather_u64offset_s64u10__SVBool_tPKiu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -196,17 +184,17 @@ svint64_t test_svld1sw_gather_u64offset_s64(svbool_t pg, const int32_t *base, sv
    return SVE_ACLE_FUNC(svld1sw_gather_, u64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1sw_gather_u64offset_u64u10__SVBool_tPKiu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1sw_gather_u64offset_u64u10__SVBool_tPKiu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -214,14 +202,14 @@ svuint64_t test_svld1sw_gather_u64offset_u64(svbool_t pg, const int32_t *base, s
    return SVE_ACLE_FUNC(svld1sw_gather_, u64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1sw_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1sw_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -232,14 +220,14 @@ svint64_t test_svld1sw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svld1sw_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1sw_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1sw_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -250,17 +238,17 @@ svuint64_t test_svld1sw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svld1sw_gather, _u64base, _offset_u64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_s64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_s64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1sw_gather_s64index_s64u10__SVBool_tPKiu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1sw_gather_s64index_s64u10__SVBool_tPKiu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -268,17 +256,17 @@ svint64_t test_svld1sw_gather_s64index_s64(svbool_t pg, const int32_t *base, svi
    return SVE_ACLE_FUNC(svld1sw_gather_, s64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_s64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_s64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1sw_gather_s64index_u64u10__SVBool_tPKiu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1sw_gather_s64index_u64u10__SVBool_tPKiu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -286,17 +274,17 @@ svuint64_t test_svld1sw_gather_s64index_u64(svbool_t pg, const int32_t *base, sv
    return SVE_ACLE_FUNC(svld1sw_gather_, s64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_u64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_u64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1sw_gather_u64index_s64u10__SVBool_tPKiu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1sw_gather_u64index_s64u10__SVBool_tPKiu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -304,17 +292,17 @@ svint64_t test_svld1sw_gather_u64index_s64(svbool_t pg, const int32_t *base, svu
    return SVE_ACLE_FUNC(svld1sw_gather_, u64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_u64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_u64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1sw_gather_u64index_u64u10__SVBool_tPKiu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1sw_gather_u64index_u64u10__SVBool_tPKiu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -322,7 +310,7 @@ svuint64_t test_svld1sw_gather_u64index_u64(svbool_t pg, const int32_t *base, sv
    return SVE_ACLE_FUNC(svld1sw_gather_, u64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_u64base_index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_u64base_index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -330,7 +318,7 @@ svuint64_t test_svld1sw_gather_u64index_u64(svbool_t pg, const int32_t *base, sv
  // CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svld1sw_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svld1sw_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -342,7 +330,7 @@ svint64_t test_svld1sw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, i
    return SVE_ACLE_FUNC(svld1sw_gather, _u64base, _index_s64, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svld1sw_gather_u64base_index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1sw_gather_u64base_index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -350,7 +338,7 @@ svint64_t test_svld1sw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, i
  // CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svld1sw_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svld1sw_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c

index 93b2346..03e4a59 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,290 +14,254 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld1ub_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1ub_s16u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1ub_s16u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
  svint16_t test_svld1ub_s16(svbool_t pg, const uint8_t *base)
  {
    return svld1ub_s16(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1ub_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1ub_s32u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1ub_s32u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svint32_t test_svld1ub_s32(svbool_t pg, const uint8_t *base)
  {
    return svld1ub_s32(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1ub_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1ub_s64u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1ub_s64u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svint64_t test_svld1ub_s64(svbool_t pg, const uint8_t *base)
  {
    return svld1ub_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1ub_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1ub_u16u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1ub_u16u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
  svuint16_t test_svld1ub_u16(svbool_t pg, const uint8_t *base)
  {
    return svld1ub_u16(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1ub_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1ub_u32u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1ub_u32u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svuint32_t test_svld1ub_u32(svbool_t pg, const uint8_t *base)
  {
    return svld1ub_u32(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1ub_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1ub_u64u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1ub_u64u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svuint64_t test_svld1ub_u64(svbool_t pg, const uint8_t *base)
  {
    return svld1ub_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1ub_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1ub_vnum_s16u10__SVBool_tPKhl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1ub_vnum_s16u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
  svint16_t test_svld1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svld1ub_vnum_s16(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1ub_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1ub_vnum_s32u10__SVBool_tPKhl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1ub_vnum_s32u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svint32_t test_svld1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svld1ub_vnum_s32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1ub_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1ub_vnum_s64u10__SVBool_tPKhl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1ub_vnum_s64u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svld1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svld1ub_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1ub_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1ub_vnum_u16u10__SVBool_tPKhl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1ub_vnum_u16u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
  svuint16_t test_svld1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svld1ub_vnum_u16(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1ub_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1ub_vnum_u32u10__SVBool_tPKhl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1ub_vnum_u32u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svuint32_t test_svld1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svld1ub_vnum_u32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1ub_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1ub_vnum_u64u10__SVBool_tPKhl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1ub_vnum_u64u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svld1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svld1ub_vnum_u64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_u32base_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_u32base_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1ub_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1ub_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -308,14 +272,14 @@ svint32_t test_svld1ub_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svld1ub_gather, _u32base, _s32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1ub_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1ub_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -326,14 +290,14 @@ svint64_t test_svld1ub_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1ub_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_u32base_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_u32base_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1ub_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1ub_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -344,14 +308,14 @@ svuint32_t test_svld1ub_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svld1ub_gather, _u32base, _u32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1ub_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1ub_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -362,17 +326,17 @@ svuint64_t test_svld1ub_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1ub_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_s32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_s32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1ub_gather_s32offset_s32u10__SVBool_tPKhu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1ub_gather_s32offset_s32u10__SVBool_tPKhu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -380,17 +344,17 @@ svint32_t test_svld1ub_gather_s32offset_s32(svbool_t pg, const uint8_t *base, sv
    return SVE_ACLE_FUNC(svld1ub_gather_, s32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1ub_gather_s64offset_s64u10__SVBool_tPKhu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1ub_gather_s64offset_s64u10__SVBool_tPKhu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -398,17 +362,17 @@ svint64_t test_svld1ub_gather_s64offset_s64(svbool_t pg, const uint8_t *base, sv
    return SVE_ACLE_FUNC(svld1ub_gather_, s64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_s32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_s32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1ub_gather_s32offset_u32u10__SVBool_tPKhu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1ub_gather_s32offset_u32u10__SVBool_tPKhu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -416,17 +380,17 @@ svuint32_t test_svld1ub_gather_s32offset_u32(svbool_t pg, const uint8_t *base, s
    return SVE_ACLE_FUNC(svld1ub_gather_, s32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1ub_gather_s64offset_u64u10__SVBool_tPKhu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1ub_gather_s64offset_u64u10__SVBool_tPKhu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -434,17 +398,17 @@ svuint64_t test_svld1ub_gather_s64offset_u64(svbool_t pg, const uint8_t *base, s
    return SVE_ACLE_FUNC(svld1ub_gather_, s64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_u32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_u32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1ub_gather_u32offset_s32u10__SVBool_tPKhu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1ub_gather_u32offset_s32u10__SVBool_tPKhu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -452,17 +416,17 @@ svint32_t test_svld1ub_gather_u32offset_s32(svbool_t pg, const uint8_t *base, sv
    return SVE_ACLE_FUNC(svld1ub_gather_, u32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1ub_gather_u64offset_s64u10__SVBool_tPKhu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1ub_gather_u64offset_s64u10__SVBool_tPKhu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -470,17 +434,17 @@ svint64_t test_svld1ub_gather_u64offset_s64(svbool_t pg, const uint8_t *base, sv
    return SVE_ACLE_FUNC(svld1ub_gather_, u64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_u32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_u32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1ub_gather_u32offset_u32u10__SVBool_tPKhu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1ub_gather_u32offset_u32u10__SVBool_tPKhu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -488,17 +452,17 @@ svuint32_t test_svld1ub_gather_u32offset_u32(svbool_t pg, const uint8_t *base, s
    return SVE_ACLE_FUNC(svld1ub_gather_, u32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1ub_gather_u64offset_u64u10__SVBool_tPKhu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1ub_gather_u64offset_u64u10__SVBool_tPKhu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -506,14 +470,14 @@ svuint64_t test_svld1ub_gather_u64offset_u64(svbool_t pg, const uint8_t *base, s
    return SVE_ACLE_FUNC(svld1ub_gather_, u64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_u32base_offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_u32base_offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1ub_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1ub_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -524,14 +488,14 @@ svint32_t test_svld1ub_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svld1ub_gather, _u32base, _offset_s32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1ub_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1ub_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -542,14 +506,14 @@ svint64_t test_svld1ub_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svld1ub_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_u32base_offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_u32base_offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1ub_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1ub_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -560,14 +524,14 @@ svuint32_t test_svld1ub_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svld1ub_gather, _u32base, _offset_u32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1ub_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1ub_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1ub_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1ub_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c

index ce62c12..d22c255 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,198 +14,174 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld1uh_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1uh_s32u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1uh_s32u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svint32_t test_svld1uh_s32(svbool_t pg, const uint16_t *base)
  {
    return svld1uh_s32(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1uh_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1uh_s64u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1uh_s64u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svint64_t test_svld1uh_s64(svbool_t pg, const uint16_t *base)
  {
    return svld1uh_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1uh_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1uh_u32u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1uh_u32u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svuint32_t test_svld1uh_u32(svbool_t pg, const uint16_t *base)
  {
    return svld1uh_u32(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1uh_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1uh_u64u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1uh_u64u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svuint64_t test_svld1uh_u64(svbool_t pg, const uint16_t *base)
  {
    return svld1uh_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1uh_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svld1uh_vnum_s32u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1uh_vnum_s32u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svint32_t test_svld1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return svld1uh_vnum_s32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1uh_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svld1uh_vnum_s64u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1uh_vnum_s64u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svld1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return svld1uh_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1uh_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svld1uh_vnum_u32u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1uh_vnum_u32u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svuint32_t test_svld1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return svld1uh_vnum_u32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1uh_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svld1uh_vnum_u64u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1uh_vnum_u64u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svld1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return svld1uh_vnum_u64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u32base_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u32base_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1uh_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1uh_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -216,14 +192,14 @@ svint32_t test_svld1uh_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svld1uh_gather, _u32base, _s32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1uh_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1uh_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -234,14 +210,14 @@ svint64_t test_svld1uh_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1uh_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u32base_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u32base_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1uh_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1uh_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -252,14 +228,14 @@ svuint32_t test_svld1uh_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svld1uh_gather, _u32base, _u32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1uh_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1uh_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -270,17 +246,17 @@ svuint64_t test_svld1uh_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1uh_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_s32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_s32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1uh_gather_s32offset_s32u10__SVBool_tPKtu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1uh_gather_s32offset_s32u10__SVBool_tPKtu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -288,17 +264,17 @@ svint32_t test_svld1uh_gather_s32offset_s32(svbool_t pg, const uint16_t *base, s
    return SVE_ACLE_FUNC(svld1uh_gather_, s32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1uh_gather_s64offset_s64u10__SVBool_tPKtu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1uh_gather_s64offset_s64u10__SVBool_tPKtu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -306,17 +282,17 @@ svint64_t test_svld1uh_gather_s64offset_s64(svbool_t pg, const uint16_t *base, s
    return SVE_ACLE_FUNC(svld1uh_gather_, s64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_s32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_s32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1uh_gather_s32offset_u32u10__SVBool_tPKtu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1uh_gather_s32offset_u32u10__SVBool_tPKtu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -324,17 +300,17 @@ svuint32_t test_svld1uh_gather_s32offset_u32(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svld1uh_gather_, s32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1uh_gather_s64offset_u64u10__SVBool_tPKtu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1uh_gather_s64offset_u64u10__SVBool_tPKtu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -342,17 +318,17 @@ svuint64_t test_svld1uh_gather_s64offset_u64(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svld1uh_gather_, s64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1uh_gather_u32offset_s32u10__SVBool_tPKtu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1uh_gather_u32offset_s32u10__SVBool_tPKtu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -360,17 +336,17 @@ svint32_t test_svld1uh_gather_u32offset_s32(svbool_t pg, const uint16_t *base, s
    return SVE_ACLE_FUNC(svld1uh_gather_, u32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1uh_gather_u64offset_s64u10__SVBool_tPKtu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1uh_gather_u64offset_s64u10__SVBool_tPKtu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -378,17 +354,17 @@ svint64_t test_svld1uh_gather_u64offset_s64(svbool_t pg, const uint16_t *base, s
    return SVE_ACLE_FUNC(svld1uh_gather_, u64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1uh_gather_u32offset_u32u10__SVBool_tPKtu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1uh_gather_u32offset_u32u10__SVBool_tPKtu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -396,17 +372,17 @@ svuint32_t test_svld1uh_gather_u32offset_u32(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svld1uh_gather_, u32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1uh_gather_u64offset_u64u10__SVBool_tPKtu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1uh_gather_u64offset_u64u10__SVBool_tPKtu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -414,14 +390,14 @@ svuint64_t test_svld1uh_gather_u64offset_u64(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svld1uh_gather_, u64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u32base_offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u32base_offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1uh_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1uh_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -432,14 +408,14 @@ svint32_t test_svld1uh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svld1uh_gather, _u32base, _offset_s32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1uh_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1uh_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -450,14 +426,14 @@ svint64_t test_svld1uh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svld1uh_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u32base_offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u32base_offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1uh_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1uh_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -468,14 +444,14 @@ svuint32_t test_svld1uh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svld1uh_gather, _u32base, _offset_u32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1uh_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1uh_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -486,17 +462,17 @@ svuint64_t test_svld1uh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svld1uh_gather, _u64base, _offset_u64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_s32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_s32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1uh_gather_s32index_s32u10__SVBool_tPKtu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1uh_gather_s32index_s32u10__SVBool_tPKtu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -504,17 +480,17 @@ svint32_t test_svld1uh_gather_s32index_s32(svbool_t pg, const uint16_t *base, sv
    return SVE_ACLE_FUNC(svld1uh_gather_, s32, index_s32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_s64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_s64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1uh_gather_s64index_s64u10__SVBool_tPKtu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1uh_gather_s64index_s64u10__SVBool_tPKtu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -522,17 +498,17 @@ svint64_t test_svld1uh_gather_s64index_s64(svbool_t pg, const uint16_t *base, sv
    return SVE_ACLE_FUNC(svld1uh_gather_, s64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_s32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_s32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1uh_gather_s32index_u32u10__SVBool_tPKtu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1uh_gather_s32index_u32u10__SVBool_tPKtu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -540,17 +516,17 @@ svuint32_t test_svld1uh_gather_s32index_u32(svbool_t pg, const uint16_t *base, s
    return SVE_ACLE_FUNC(svld1uh_gather_, s32, index_u32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_s64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_s64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1uh_gather_s64index_u64u10__SVBool_tPKtu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1uh_gather_s64index_u64u10__SVBool_tPKtu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -558,17 +534,17 @@ svuint64_t test_svld1uh_gather_s64index_u64(svbool_t pg, const uint16_t *base, s
    return SVE_ACLE_FUNC(svld1uh_gather_, s64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1uh_gather_u32index_s32u10__SVBool_tPKtu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1uh_gather_u32index_s32u10__SVBool_tPKtu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -576,17 +552,17 @@ svint32_t test_svld1uh_gather_u32index_s32(svbool_t pg, const uint16_t *base, sv
    return SVE_ACLE_FUNC(svld1uh_gather_, u32, index_s32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1uh_gather_u64index_s64u10__SVBool_tPKtu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1uh_gather_u64index_s64u10__SVBool_tPKtu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -594,17 +570,17 @@ svint64_t test_svld1uh_gather_u64index_s64(svbool_t pg, const uint16_t *base, sv
    return SVE_ACLE_FUNC(svld1uh_gather_, u64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1uh_gather_u32index_u32u10__SVBool_tPKtu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1uh_gather_u32index_u32u10__SVBool_tPKtu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -612,17 +588,17 @@ svuint32_t test_svld1uh_gather_u32index_u32(svbool_t pg, const uint16_t *base, s
    return SVE_ACLE_FUNC(svld1uh_gather_, u32, index_u32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1uh_gather_u64index_u64u10__SVBool_tPKtu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1uh_gather_u64index_u64u10__SVBool_tPKtu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -630,7 +606,7 @@ svuint64_t test_svld1uh_gather_u64index_u64(svbool_t pg, const uint16_t *base, s
    return SVE_ACLE_FUNC(svld1uh_gather_, u64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u32base_index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u32base_index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -638,7 +614,7 @@ svuint64_t test_svld1uh_gather_u64index_u64(svbool_t pg, const uint16_t *base, s
  // CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svld1uh_gather_u32base_index_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svld1uh_gather_u32base_index_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -650,7 +626,7 @@ svint32_t test_svld1uh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, i
    return SVE_ACLE_FUNC(svld1uh_gather, _u32base, _index_s32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u64base_index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u64base_index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -658,7 +634,7 @@ svint32_t test_svld1uh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, i
  // CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svld1uh_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svld1uh_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -670,7 +646,7 @@ svint64_t test_svld1uh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, i
    return SVE_ACLE_FUNC(svld1uh_gather, _u64base, _index_s64, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u32base_index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u32base_index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -678,7 +654,7 @@ svint64_t test_svld1uh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, i
  // CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svld1uh_gather_u32base_index_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svld1uh_gather_u32base_index_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -690,7 +666,7 @@ svuint32_t test_svld1uh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svld1uh_gather, _u32base, _index_u32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svld1uh_gather_u64base_index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uh_gather_u64base_index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -698,7 +674,7 @@ svuint32_t test_svld1uh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases,
  // CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svld1uh_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svld1uh_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c

index 92b45fb..a2df984 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,106 +14,94 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld1uw_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1uw_s64u10__SVBool_tPKj(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1uw_s64u10__SVBool_tPKj(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svint64_t test_svld1uw_s64(svbool_t pg, const uint32_t *base)
  {
    return svld1uw_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1uw_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svld1uw_u64u10__SVBool_tPKj(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svld1uw_u64u10__SVBool_tPKj(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svuint64_t test_svld1uw_u64(svbool_t pg, const uint32_t *base)
  {
    return svld1uw_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svld1uw_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1uw_vnum_s64u10__SVBool_tPKjl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1uw_vnum_s64u10__SVBool_tPKjl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svld1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum)
  {
    return svld1uw_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1uw_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
-//
-// CPP-CHECK-LABEL: @_Z21test_svld1uw_vnum_u64u10__SVBool_tPKjl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svld1uw_vnum_u64u10__SVBool_tPKjl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svld1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum)
  {
    return svld1uw_vnum_u64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1uw_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1uw_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -124,14 +112,14 @@ svint64_t test_svld1uw_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1uw_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svld1uw_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svld1uw_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -142,17 +130,17 @@ svuint64_t test_svld1uw_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svld1uw_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1uw_gather_s64offset_s64u10__SVBool_tPKju11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1uw_gather_s64offset_s64u10__SVBool_tPKju11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -160,17 +148,17 @@ svint64_t test_svld1uw_gather_s64offset_s64(svbool_t pg, const uint32_t *base, s
    return SVE_ACLE_FUNC(svld1uw_gather_, s64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1uw_gather_s64offset_u64u10__SVBool_tPKju11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1uw_gather_s64offset_u64u10__SVBool_tPKju11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -178,17 +166,17 @@ svuint64_t test_svld1uw_gather_s64offset_u64(svbool_t pg, const uint32_t *base,
    return SVE_ACLE_FUNC(svld1uw_gather_, s64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1uw_gather_u64offset_s64u10__SVBool_tPKju12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1uw_gather_u64offset_s64u10__SVBool_tPKju12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -196,17 +184,17 @@ svint64_t test_svld1uw_gather_u64offset_s64(svbool_t pg, const uint32_t *base, s
    return SVE_ACLE_FUNC(svld1uw_gather_, u64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svld1uw_gather_u64offset_u64u10__SVBool_tPKju12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svld1uw_gather_u64offset_u64u10__SVBool_tPKju12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -214,14 +202,14 @@ svuint64_t test_svld1uw_gather_u64offset_u64(svbool_t pg, const uint32_t *base,
    return SVE_ACLE_FUNC(svld1uw_gather_, u64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1uw_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1uw_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -232,14 +220,14 @@ svint64_t test_svld1uw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svld1uw_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svld1uw_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svld1uw_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -250,17 +238,17 @@ svuint64_t test_svld1uw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svld1uw_gather, _u64base, _offset_u64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_s64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_s64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1uw_gather_s64index_s64u10__SVBool_tPKju11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1uw_gather_s64index_s64u10__SVBool_tPKju11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -268,17 +256,17 @@ svint64_t test_svld1uw_gather_s64index_s64(svbool_t pg, const uint32_t *base, sv
    return SVE_ACLE_FUNC(svld1uw_gather_, s64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_s64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_s64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1uw_gather_s64index_u64u10__SVBool_tPKju11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1uw_gather_s64index_u64u10__SVBool_tPKju11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -286,17 +274,17 @@ svuint64_t test_svld1uw_gather_s64index_u64(svbool_t pg, const uint32_t *base, s
    return SVE_ACLE_FUNC(svld1uw_gather_, s64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_u64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_u64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1uw_gather_u64index_s64u10__SVBool_tPKju12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1uw_gather_u64index_s64u10__SVBool_tPKju12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -304,17 +292,17 @@ svint64_t test_svld1uw_gather_u64index_s64(svbool_t pg, const uint32_t *base, sv
    return SVE_ACLE_FUNC(svld1uw_gather_, u64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_u64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_u64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svld1uw_gather_u64index_u64u10__SVBool_tPKju12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svld1uw_gather_u64index_u64u10__SVBool_tPKju12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -322,7 +310,7 @@ svuint64_t test_svld1uw_gather_u64index_u64(svbool_t pg, const uint32_t *base, s
    return SVE_ACLE_FUNC(svld1uw_gather_, u64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_u64base_index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_u64base_index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -330,7 +318,7 @@ svuint64_t test_svld1uw_gather_u64index_u64(svbool_t pg, const uint32_t *base, s
  // CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svld1uw_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svld1uw_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -342,7 +330,7 @@ svint64_t test_svld1uw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, i
    return SVE_ACLE_FUNC(svld1uw_gather, _u64base, _index_s64, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svld1uw_gather_u64base_index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld1uw_gather_u64base_index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -350,7 +338,7 @@ svint64_t test_svld1uw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, i
  // CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svld1uw_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svld1uw_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c

index 6f3224b..57d153e 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -15,20 +15,20 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld2_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 8 x bfloat> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], 1
  // CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], i64 8)
  // CHECK-NEXT:    ret <vscale x 16 x bfloat> [[TMP5]]
  //
-// CPP-CHECK-LABEL: @_Z15test_svld2_bf16u10__SVBool_tPKu6__bf16(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svld2_bf16u10__SVBool_tPKu6__bf16(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 8 x bfloat> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], 1
@@ -41,29 +41,27 @@ svbfloat16x2_t test_svld2_bf16(svbool_t pg, const bfloat16_t *base)
  }
  
  
-// CHECK-LABEL: @test_svld2_vnum_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_vnum_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 8 x bfloat> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP5]], <vscale x 8 x bfloat> [[TMP6]], i64 8)
-// CHECK-NEXT:    ret <vscale x 16 x bfloat> [[TMP7]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 8 x bfloat> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x bfloat> [[TMP6]]
  //
-// CPP-CHECK-LABEL: @_Z20test_svld2_vnum_bf16u10__SVBool_tPKu6__bf16l(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svld2_vnum_bf16u10__SVBool_tPKu6__bf16l(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 8 x bfloat> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP5]], <vscale x 8 x bfloat> [[TMP6]], i64 8)
-// CPP-CHECK-NEXT:    ret <vscale x 16 x bfloat> [[TMP7]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 8 x bfloat> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x bfloat> [[TMP6]]
  //
  svbfloat16x2_t test_svld2_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c

index 157488e..997a481 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -15,18 +15,18 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld2_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
  // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
  // CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
  // CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP4]]
  //
-// CPP-CHECK-LABEL: @_Z13test_svld2_s8u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svld2_s8u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
@@ -38,20 +38,20 @@ svint8x2_t test_svld2_s8(svbool_t pg, const int8_t *base)
    return SVE_ACLE_FUNC(svld2,_s8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld2_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
  // CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
  // CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP5]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld2_s16u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld2_s16u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
@@ -63,20 +63,20 @@ svint16x2_t test_svld2_s16(svbool_t pg, const int16_t *base)
    return SVE_ACLE_FUNC(svld2,_s16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld2_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
  // CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
  // CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP5]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld2_s32u10__SVBool_tPKi(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld2_s32u10__SVBool_tPKi(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
@@ -88,20 +88,20 @@ svint32x2_t test_svld2_s32(svbool_t pg, const int32_t *base)
    return SVE_ACLE_FUNC(svld2,_s32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld2_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
  // CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
  // CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP5]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld2_s64u10__SVBool_tPKl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld2_s64u10__SVBool_tPKl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
@@ -113,18 +113,18 @@ svint64x2_t test_svld2_s64(svbool_t pg, const int64_t *base)
    return SVE_ACLE_FUNC(svld2,_s64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld2_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
  // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
  // CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
  // CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP4]]
  //
-// CPP-CHECK-LABEL: @_Z13test_svld2_u8u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svld2_u8u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
@@ -136,20 +136,20 @@ svuint8x2_t test_svld2_u8(svbool_t pg, const uint8_t *base)
    return SVE_ACLE_FUNC(svld2,_u8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld2_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
  // CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
  // CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP5]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld2_u16u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld2_u16u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
@@ -161,20 +161,20 @@ svuint16x2_t test_svld2_u16(svbool_t pg, const uint16_t *base)
    return SVE_ACLE_FUNC(svld2,_u16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld2_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
  // CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
  // CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP5]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld2_u32u10__SVBool_tPKj(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld2_u32u10__SVBool_tPKj(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
@@ -186,20 +186,20 @@ svuint32x2_t test_svld2_u32(svbool_t pg, const uint32_t *base)
    return SVE_ACLE_FUNC(svld2,_u32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld2_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
  // CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
  // CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP5]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld2_u64u10__SVBool_tPKm(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld2_u64u10__SVBool_tPKm(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
@@ -211,20 +211,20 @@ svuint64x2_t test_svld2_u64(svbool_t pg, const uint64_t *base)
    return SVE_ACLE_FUNC(svld2,_u64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld2_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
  // CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], i64 8)
  // CHECK-NEXT:    ret <vscale x 16 x half> [[TMP5]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld2_f16u10__SVBool_tPKDh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld2_f16u10__SVBool_tPKDh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
@@ -236,20 +236,20 @@ svfloat16x2_t test_svld2_f16(svbool_t pg, const float16_t *base)
    return SVE_ACLE_FUNC(svld2,_f16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld2_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
  // CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], i64 4)
  // CHECK-NEXT:    ret <vscale x 8 x float> [[TMP5]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld2_f32u10__SVBool_tPKf(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld2_f32u10__SVBool_tPKf(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
@@ -261,20 +261,20 @@ svfloat32x2_t test_svld2_f32(svbool_t pg, const float32_t *base)
    return SVE_ACLE_FUNC(svld2,_f32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld2_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
  // CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], i64 2)
  // CHECK-NEXT:    ret <vscale x 4 x double> [[TMP5]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld2_f64u10__SVBool_tPKd(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld2_f64u10__SVBool_tPKd(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
@@ -286,315 +286,293 @@ svfloat64x2_t test_svld2_f64(svbool_t pg, const float64_t *base)
    return SVE_ACLE_FUNC(svld2,_f64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld2_vnum_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_vnum_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
-// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
-// CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP6]]
-//
-// CPP-CHECK-LABEL: @_Z18test_svld2_vnum_s8u10__SVBool_tPKal(
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svld2_vnum_s8u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
-// CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP6]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
  //
  svint8x2_t test_svld2_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld2_vnum,_s8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld2_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 8)
-// CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP7]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld2_vnum_s16u10__SVBool_tPKsl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld2_vnum_s16u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 8)
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP7]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP6]]
  //
  svint16x2_t test_svld2_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld2_vnum,_s16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld2_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 4)
-// CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP7]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld2_vnum_s32u10__SVBool_tPKil(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld2_vnum_s32u10__SVBool_tPKil(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 4)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP7]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP6]]
  //
  svint32x2_t test_svld2_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld2_vnum,_s32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld2_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 2)
-// CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP7]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld2_vnum_s64u10__SVBool_tPKll(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld2_vnum_s64u10__SVBool_tPKll(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 2)
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP7]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP6]]
  //
  svint64x2_t test_svld2_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld2_vnum,_s64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld2_vnum_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_vnum_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
-// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
-// CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP6]]
-//
-// CPP-CHECK-LABEL: @_Z18test_svld2_vnum_u8u10__SVBool_tPKhl(
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svld2_vnum_u8u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
-// CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP6]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
  //
  svuint8x2_t test_svld2_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld2_vnum,_u8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld2_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 8)
-// CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP7]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld2_vnum_u16u10__SVBool_tPKtl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld2_vnum_u16u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 8)
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP7]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP6]]
  //
  svuint16x2_t test_svld2_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld2_vnum,_u16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld2_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 4)
-// CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP7]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld2_vnum_u32u10__SVBool_tPKjl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld2_vnum_u32u10__SVBool_tPKjl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 4)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP7]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP6]]
  //
  svuint32x2_t test_svld2_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld2_vnum,_u32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld2_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 2)
-// CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP7]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld2_vnum_u64u10__SVBool_tPKml(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld2_vnum_u64u10__SVBool_tPKml(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 2)
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP7]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP6]]
  //
  svuint64x2_t test_svld2_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld2_vnum,_u64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld2_vnum_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_vnum_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], i64 8)
-// CHECK-NEXT:    ret <vscale x 16 x half> [[TMP7]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld2_vnum_f16u10__SVBool_tPKDhl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x half> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld2_vnum_f16u10__SVBool_tPKDhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], i64 8)
-// CPP-CHECK-NEXT:    ret <vscale x 16 x half> [[TMP7]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x half> [[TMP6]]
  //
  svfloat16x2_t test_svld2_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld2_vnum,_f16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld2_vnum_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_vnum_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], i64 4)
-// CHECK-NEXT:    ret <vscale x 8 x float> [[TMP7]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld2_vnum_f32u10__SVBool_tPKfl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x float> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld2_vnum_f32u10__SVBool_tPKfl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], i64 4)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x float> [[TMP7]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x float> [[TMP6]]
  //
  svfloat32x2_t test_svld2_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld2_vnum,_f32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld2_vnum_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld2_vnum_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], i64 2)
-// CHECK-NEXT:    ret <vscale x 4 x double> [[TMP7]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld2_vnum_f64u10__SVBool_tPKdl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x double> [[TMP6]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld2_vnum_f64u10__SVBool_tPKdl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], i64 2)
-// CPP-CHECK-NEXT:    ret <vscale x 4 x double> [[TMP7]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x double> [[TMP6]]
  //
  svfloat64x2_t test_svld2_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c

index 5e78d05..0094ca7 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -15,10 +15,10 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld3_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> poison, <vscale x 8 x bfloat> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], 1
@@ -27,10 +27,10 @@
  // CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> [[TMP5]], <vscale x 8 x bfloat> [[TMP6]], i64 16)
  // CHECK-NEXT:    ret <vscale x 24 x bfloat> [[TMP7]]
  //
-// CPP-CHECK-LABEL: @_Z15test_svld3_bf16u10__SVBool_tPKu6__bf16(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svld3_bf16u10__SVBool_tPKu6__bf16(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> poison, <vscale x 8 x bfloat> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], 1
@@ -44,33 +44,31 @@ svbfloat16x3_t test_svld3_bf16(svbool_t pg, const bfloat16_t *base)
    return SVE_ACLE_FUNC(svld3,_bf16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld3_vnum_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_vnum_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> poison, <vscale x 8 x bfloat> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> [[TMP5]], <vscale x 8 x bfloat> [[TMP6]], i64 8)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> [[TMP7]], <vscale x 8 x bfloat> [[TMP8]], i64 16)
-// CHECK-NEXT:    ret <vscale x 24 x bfloat> [[TMP9]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> poison, <vscale x 8 x bfloat> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]], i64 16)
+// CHECK-NEXT:    ret <vscale x 24 x bfloat> [[TMP8]]
  //
-// CPP-CHECK-LABEL: @_Z20test_svld3_vnum_bf16u10__SVBool_tPKu6__bf16l(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svld3_vnum_bf16u10__SVBool_tPKu6__bf16l(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> poison, <vscale x 8 x bfloat> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> [[TMP5]], <vscale x 8 x bfloat> [[TMP6]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> [[TMP7]], <vscale x 8 x bfloat> [[TMP8]], i64 16)
-// CPP-CHECK-NEXT:    ret <vscale x 24 x bfloat> [[TMP9]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> poison, <vscale x 8 x bfloat> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 24 x bfloat> [[TMP8]]
  //
  svbfloat16x3_t test_svld3_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c

index 54ae53c..fd0cc44 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -15,9 +15,9 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld3_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
  // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
@@ -26,9 +26,9 @@
  // CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32)
  // CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP6]]
  //
-// CPP-CHECK-LABEL: @_Z13test_svld3_s8u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svld3_s8u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
@@ -42,10 +42,10 @@ svint8x3_t test_svld3_s8(svbool_t pg, const int8_t *base)
    return SVE_ACLE_FUNC(svld3,_s8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld3_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
@@ -54,10 +54,10 @@ svint8x3_t test_svld3_s8(svbool_t pg, const int8_t *base)
  // CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 16)
  // CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP7]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld3_s16u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld3_s16u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
@@ -71,10 +71,10 @@ svint16x3_t test_svld3_s16(svbool_t pg, const int16_t *base)
    return SVE_ACLE_FUNC(svld3,_s16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld3_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
@@ -83,10 +83,10 @@ svint16x3_t test_svld3_s16(svbool_t pg, const int16_t *base)
  // CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 8)
  // CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP7]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld3_s32u10__SVBool_tPKi(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld3_s32u10__SVBool_tPKi(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
@@ -100,10 +100,10 @@ svint32x3_t test_svld3_s32(svbool_t pg, const int32_t *base)
    return SVE_ACLE_FUNC(svld3,_s32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld3_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
@@ -112,10 +112,10 @@ svint32x3_t test_svld3_s32(svbool_t pg, const int32_t *base)
  // CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 4)
  // CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP7]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld3_s64u10__SVBool_tPKl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld3_s64u10__SVBool_tPKl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
@@ -129,9 +129,9 @@ svint64x3_t test_svld3_s64(svbool_t pg, const int64_t *base)
    return SVE_ACLE_FUNC(svld3,_s64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld3_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
  // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
@@ -140,9 +140,9 @@ svint64x3_t test_svld3_s64(svbool_t pg, const int64_t *base)
  // CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32)
  // CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP6]]
  //
-// CPP-CHECK-LABEL: @_Z13test_svld3_u8u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svld3_u8u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
@@ -156,10 +156,10 @@ svuint8x3_t test_svld3_u8(svbool_t pg, const uint8_t *base)
    return SVE_ACLE_FUNC(svld3,_u8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld3_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
@@ -168,10 +168,10 @@ svuint8x3_t test_svld3_u8(svbool_t pg, const uint8_t *base)
  // CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 16)
  // CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP7]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld3_u16u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld3_u16u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
@@ -185,10 +185,10 @@ svuint16x3_t test_svld3_u16(svbool_t pg, const uint16_t *base)
    return SVE_ACLE_FUNC(svld3,_u16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld3_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
@@ -197,10 +197,10 @@ svuint16x3_t test_svld3_u16(svbool_t pg, const uint16_t *base)
  // CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 8)
  // CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP7]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld3_u32u10__SVBool_tPKj(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld3_u32u10__SVBool_tPKj(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
@@ -214,10 +214,10 @@ svuint32x3_t test_svld3_u32(svbool_t pg, const uint32_t *base)
    return SVE_ACLE_FUNC(svld3,_u32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld3_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
@@ -226,10 +226,10 @@ svuint32x3_t test_svld3_u32(svbool_t pg, const uint32_t *base)
  // CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 4)
  // CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP7]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld3_u64u10__SVBool_tPKm(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld3_u64u10__SVBool_tPKm(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
@@ -243,10 +243,10 @@ svuint64x3_t test_svld3_u64(svbool_t pg, const uint64_t *base)
    return SVE_ACLE_FUNC(svld3,_u64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld3_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
@@ -255,10 +255,10 @@ svuint64x3_t test_svld3_u64(svbool_t pg, const uint64_t *base)
  // CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], i64 16)
  // CHECK-NEXT:    ret <vscale x 24 x half> [[TMP7]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld3_f16u10__SVBool_tPKDh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld3_f16u10__SVBool_tPKDh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
@@ -272,10 +272,10 @@ svfloat16x3_t test_svld3_f16(svbool_t pg, const float16_t *base)
    return SVE_ACLE_FUNC(svld3,_f16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld3_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
@@ -284,10 +284,10 @@ svfloat16x3_t test_svld3_f16(svbool_t pg, const float16_t *base)
  // CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], i64 8)
  // CHECK-NEXT:    ret <vscale x 12 x float> [[TMP7]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld3_f32u10__SVBool_tPKf(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld3_f32u10__SVBool_tPKf(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
@@ -301,10 +301,10 @@ svfloat32x3_t test_svld3_f32(svbool_t pg, const float32_t *base)
    return SVE_ACLE_FUNC(svld3,_f32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld3_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
@@ -313,10 +313,10 @@ svfloat32x3_t test_svld3_f32(svbool_t pg, const float32_t *base)
  // CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], i64 4)
  // CHECK-NEXT:    ret <vscale x 6 x double> [[TMP7]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld3_f64u10__SVBool_tPKd(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld3_f64u10__SVBool_tPKd(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
@@ -330,359 +330,337 @@ svfloat64x3_t test_svld3_f64(svbool_t pg, const float64_t *base)
    return SVE_ACLE_FUNC(svld3,_f64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld3_vnum_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_vnum_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
-// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
-// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 2
-// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 32)
-// CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP8]]
-//
-// CPP-CHECK-LABEL: @_Z18test_svld3_vnum_s8u10__SVBool_tPKal(
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP7]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svld3_vnum_s8u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 2
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 32)
-// CPP-CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP8]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CPP-CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP7]]
  //
  svint8x3_t test_svld3_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld3_vnum,_s8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld3_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> poison, <vscale x 8 x i16> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 8)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 16)
-// CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP9]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld3_vnum_s16u10__SVBool_tPKsl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> poison, <vscale x 8 x i16> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 16)
+// CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP8]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld3_vnum_s16u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> poison, <vscale x 8 x i16> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 16)
-// CPP-CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP9]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> poison, <vscale x 8 x i16> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP8]]
  //
  svint16x3_t test_svld3_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld3_vnum,_s16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld3_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 4)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 8)
-// CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP9]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld3_vnum_s32u10__SVBool_tPKil(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 8)
+// CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP8]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld3_vnum_s32u10__SVBool_tPKil(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 8)
-// CPP-CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP9]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP8]]
  //
  svint32x3_t test_svld3_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld3_vnum,_s32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld3_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> poison, <vscale x 2 x i64> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 2)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 4)
-// CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP9]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld3_vnum_s64u10__SVBool_tPKll(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> poison, <vscale x 2 x i64> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 4)
+// CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP8]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld3_vnum_s64u10__SVBool_tPKll(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> poison, <vscale x 2 x i64> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 2)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 4)
-// CPP-CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP9]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> poison, <vscale x 2 x i64> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP8]]
  //
  svint64x3_t test_svld3_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld3_vnum,_s64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld3_vnum_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_vnum_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
-// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
-// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 2
-// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 32)
-// CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP8]]
-//
-// CPP-CHECK-LABEL: @_Z18test_svld3_vnum_u8u10__SVBool_tPKhl(
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP7]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svld3_vnum_u8u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 2
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 32)
-// CPP-CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP8]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CPP-CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP7]]
  //
  svuint8x3_t test_svld3_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld3_vnum,_u8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld3_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> poison, <vscale x 8 x i16> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 8)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 16)
-// CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP9]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld3_vnum_u16u10__SVBool_tPKtl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> poison, <vscale x 8 x i16> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 16)
+// CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP8]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld3_vnum_u16u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> poison, <vscale x 8 x i16> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 16)
-// CPP-CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP9]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> poison, <vscale x 8 x i16> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP8]]
  //
  svuint16x3_t test_svld3_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld3_vnum,_u16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld3_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 4)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 8)
-// CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP9]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld3_vnum_u32u10__SVBool_tPKjl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 8)
+// CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP8]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld3_vnum_u32u10__SVBool_tPKjl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 8)
-// CPP-CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP9]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP8]]
  //
  svuint32x3_t test_svld3_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld3_vnum,_u32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld3_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> poison, <vscale x 2 x i64> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 2)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 4)
-// CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP9]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld3_vnum_u64u10__SVBool_tPKml(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> poison, <vscale x 2 x i64> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 4)
+// CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP8]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld3_vnum_u64u10__SVBool_tPKml(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> poison, <vscale x 2 x i64> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 2)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 4)
-// CPP-CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP9]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> poison, <vscale x 2 x i64> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP8]]
  //
  svuint64x3_t test_svld3_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld3_vnum,_u64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld3_vnum_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_vnum_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> poison, <vscale x 8 x half> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], i64 8)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> [[TMP7]], <vscale x 8 x half> [[TMP8]], i64 16)
-// CHECK-NEXT:    ret <vscale x 24 x half> [[TMP9]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld3_vnum_f16u10__SVBool_tPKDhl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> poison, <vscale x 8 x half> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 16)
+// CHECK-NEXT:    ret <vscale x 24 x half> [[TMP8]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld3_vnum_f16u10__SVBool_tPKDhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> poison, <vscale x 8 x half> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> [[TMP7]], <vscale x 8 x half> [[TMP8]], i64 16)
-// CPP-CHECK-NEXT:    ret <vscale x 24 x half> [[TMP9]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> poison, <vscale x 8 x half> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 24 x half> [[TMP8]]
  //
  svfloat16x3_t test_svld3_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld3_vnum,_f16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld3_vnum_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_vnum_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> poison, <vscale x 4 x float> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], i64 4)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> [[TMP7]], <vscale x 4 x float> [[TMP8]], i64 8)
-// CHECK-NEXT:    ret <vscale x 12 x float> [[TMP9]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld3_vnum_f32u10__SVBool_tPKfl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 8)
+// CHECK-NEXT:    ret <vscale x 12 x float> [[TMP8]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld3_vnum_f32u10__SVBool_tPKfl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> poison, <vscale x 4 x float> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> [[TMP7]], <vscale x 4 x float> [[TMP8]], i64 8)
-// CPP-CHECK-NEXT:    ret <vscale x 12 x float> [[TMP9]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 12 x float> [[TMP8]]
  //
  svfloat32x3_t test_svld3_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld3_vnum,_f32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld3_vnum_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld3_vnum_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> poison, <vscale x 2 x double> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], i64 2)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> [[TMP7]], <vscale x 2 x double> [[TMP8]], i64 4)
-// CHECK-NEXT:    ret <vscale x 6 x double> [[TMP9]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld3_vnum_f64u10__SVBool_tPKdl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> poison, <vscale x 2 x double> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], i64 2)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]], i64 4)
+// CHECK-NEXT:    ret <vscale x 6 x double> [[TMP8]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld3_vnum_f64u10__SVBool_tPKdl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> poison, <vscale x 2 x double> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], i64 2)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> [[TMP7]], <vscale x 2 x double> [[TMP8]], i64 4)
-// CPP-CHECK-NEXT:    ret <vscale x 6 x double> [[TMP9]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> poison, <vscale x 2 x double> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 6 x double> [[TMP8]]
  //
  svfloat64x3_t test_svld3_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c

index 52f74ae..1b6fdda 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -15,10 +15,10 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld4_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> poison, <vscale x 8 x bfloat> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], 1
@@ -29,10 +29,10 @@
  // CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP7]], <vscale x 8 x bfloat> [[TMP8]], i64 24)
  // CHECK-NEXT:    ret <vscale x 32 x bfloat> [[TMP9]]
  //
-// CPP-CHECK-LABEL: @_Z15test_svld4_bf16u10__SVBool_tPKu6__bf16(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svld4_bf16u10__SVBool_tPKu6__bf16(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> poison, <vscale x 8 x bfloat> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP1]], 1
@@ -48,37 +48,35 @@ svbfloat16x4_t test_svld4_bf16(svbool_t pg, const bfloat16_t *base)
    return SVE_ACLE_FUNC(svld4,_bf16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld4_vnum_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_vnum_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> poison, <vscale x 8 x bfloat> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP5]], <vscale x 8 x bfloat> [[TMP6]], i64 8)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP7]], <vscale x 8 x bfloat> [[TMP8]], i64 16)
-// CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 3
-// CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP9]], <vscale x 8 x bfloat> [[TMP10]], i64 24)
-// CHECK-NEXT:    ret <vscale x 32 x bfloat> [[TMP11]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> poison, <vscale x 8 x bfloat> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]], i64 16)
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 3
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP8]], <vscale x 8 x bfloat> [[TMP9]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x bfloat> [[TMP10]]
  //
-// CPP-CHECK-LABEL: @_Z20test_svld4_vnum_bf16u10__SVBool_tPKu6__bf16l(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svld4_vnum_bf16u10__SVBool_tPKu6__bf16l(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> poison, <vscale x 8 x bfloat> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP5]], <vscale x 8 x bfloat> [[TMP6]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP7]], <vscale x 8 x bfloat> [[TMP8]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP3]], 3
-// CPP-CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP9]], <vscale x 8 x bfloat> [[TMP10]], i64 24)
-// CPP-CHECK-NEXT:    ret <vscale x 32 x bfloat> [[TMP11]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4.sret.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> poison, <vscale x 8 x bfloat> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 3
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP8]], <vscale x 8 x bfloat> [[TMP9]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x bfloat> [[TMP10]]
  //
  svbfloat16x4_t test_svld4_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c

index a9f7638..6f1ff59 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -15,9 +15,9 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svld4_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
  // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
@@ -28,9 +28,9 @@
  // CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
  // CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP8]]
  //
-// CPP-CHECK-LABEL: @_Z13test_svld4_s8u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svld4_s8u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
@@ -46,10 +46,10 @@ svint8x4_t test_svld4_s8(svbool_t pg, const int8_t *base)
    return SVE_ACLE_FUNC(svld4,_s8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld4_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
@@ -60,10 +60,10 @@ svint8x4_t test_svld4_s8(svbool_t pg, const int8_t *base)
  // CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 24)
  // CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP9]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld4_s16u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld4_s16u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
@@ -79,10 +79,10 @@ svint16x4_t test_svld4_s16(svbool_t pg, const int16_t *base)
    return SVE_ACLE_FUNC(svld4,_s16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld4_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
@@ -93,10 +93,10 @@ svint16x4_t test_svld4_s16(svbool_t pg, const int16_t *base)
  // CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 12)
  // CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP9]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld4_s32u10__SVBool_tPKi(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld4_s32u10__SVBool_tPKi(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
@@ -112,10 +112,10 @@ svint32x4_t test_svld4_s32(svbool_t pg, const int32_t *base)
    return SVE_ACLE_FUNC(svld4,_s32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld4_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
@@ -126,10 +126,10 @@ svint32x4_t test_svld4_s32(svbool_t pg, const int32_t *base)
  // CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 6)
  // CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP9]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld4_s64u10__SVBool_tPKl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld4_s64u10__SVBool_tPKl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
@@ -145,9 +145,9 @@ svint64x4_t test_svld4_s64(svbool_t pg, const int64_t *base)
    return SVE_ACLE_FUNC(svld4,_s64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld4_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
  // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
@@ -158,9 +158,9 @@ svint64x4_t test_svld4_s64(svbool_t pg, const int64_t *base)
  // CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
  // CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP8]]
  //
-// CPP-CHECK-LABEL: @_Z13test_svld4_u8u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svld4_u8u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
@@ -176,10 +176,10 @@ svuint8x4_t test_svld4_u8(svbool_t pg, const uint8_t *base)
    return SVE_ACLE_FUNC(svld4,_u8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld4_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
@@ -190,10 +190,10 @@ svuint8x4_t test_svld4_u8(svbool_t pg, const uint8_t *base)
  // CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 24)
  // CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP9]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld4_u16u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld4_u16u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
@@ -209,10 +209,10 @@ svuint16x4_t test_svld4_u16(svbool_t pg, const uint16_t *base)
    return SVE_ACLE_FUNC(svld4,_u16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld4_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
@@ -223,10 +223,10 @@ svuint16x4_t test_svld4_u16(svbool_t pg, const uint16_t *base)
  // CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 12)
  // CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP9]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld4_u32u10__SVBool_tPKj(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld4_u32u10__SVBool_tPKj(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
@@ -242,10 +242,10 @@ svuint32x4_t test_svld4_u32(svbool_t pg, const uint32_t *base)
    return SVE_ACLE_FUNC(svld4,_u32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld4_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
@@ -256,10 +256,10 @@ svuint32x4_t test_svld4_u32(svbool_t pg, const uint32_t *base)
  // CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 6)
  // CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP9]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld4_u64u10__SVBool_tPKm(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld4_u64u10__SVBool_tPKm(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
@@ -275,10 +275,10 @@ svuint64x4_t test_svld4_u64(svbool_t pg, const uint64_t *base)
    return SVE_ACLE_FUNC(svld4,_u64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld4_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
@@ -289,10 +289,10 @@ svuint64x4_t test_svld4_u64(svbool_t pg, const uint64_t *base)
  // CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP7]], <vscale x 8 x half> [[TMP8]], i64 24)
  // CHECK-NEXT:    ret <vscale x 32 x half> [[TMP9]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld4_f16u10__SVBool_tPKDh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld4_f16u10__SVBool_tPKDh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
@@ -308,10 +308,10 @@ svfloat16x4_t test_svld4_f16(svbool_t pg, const float16_t *base)
    return SVE_ACLE_FUNC(svld4,_f16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld4_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
@@ -322,10 +322,10 @@ svfloat16x4_t test_svld4_f16(svbool_t pg, const float16_t *base)
  // CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP7]], <vscale x 4 x float> [[TMP8]], i64 12)
  // CHECK-NEXT:    ret <vscale x 16 x float> [[TMP9]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld4_f32u10__SVBool_tPKf(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld4_f32u10__SVBool_tPKf(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
@@ -341,10 +341,10 @@ svfloat32x4_t test_svld4_f32(svbool_t pg, const float32_t *base)
    return SVE_ACLE_FUNC(svld4,_f32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld4_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
  // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
@@ -355,10 +355,10 @@ svfloat32x4_t test_svld4_f32(svbool_t pg, const float32_t *base)
  // CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP7]], <vscale x 2 x double> [[TMP8]], i64 6)
  // CHECK-NEXT:    ret <vscale x 8 x double> [[TMP9]]
  //
-// CPP-CHECK-LABEL: @_Z14test_svld4_f64u10__SVBool_tPKd(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svld4_f64u10__SVBool_tPKd(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
@@ -374,403 +374,381 @@ svfloat64x4_t test_svld4_f64(svbool_t pg, const float64_t *base)
    return SVE_ACLE_FUNC(svld4,_f64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svld4_vnum_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_vnum_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
-// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
-// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 2
-// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 32)
-// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 3
-// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]], i64 48)
-// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP10]]
-//
-// CPP-CHECK-LABEL: @_Z18test_svld4_vnum_s8u10__SVBool_tPKal(
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
+// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svld4_vnum_s8u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 2
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 32)
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 3
-// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]], i64 48)
-// CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP10]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
+// CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
  //
  svint8x4_t test_svld4_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld4_vnum,_s8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld4_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 8)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 16)
-// CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 3
-// CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP9]], <vscale x 8 x i16> [[TMP10]], i64 24)
-// CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP11]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld4_vnum_s16u10__SVBool_tPKsl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 16)
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 3
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP8]], <vscale x 8 x i16> [[TMP9]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP10]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld4_vnum_s16u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 3
-// CPP-CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP9]], <vscale x 8 x i16> [[TMP10]], i64 24)
-// CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP11]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 3
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP8]], <vscale x 8 x i16> [[TMP9]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP10]]
  //
  svint16x4_t test_svld4_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld4_vnum,_s16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld4_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 4)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 8)
-// CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 3
-// CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP9]], <vscale x 4 x i32> [[TMP10]], i64 12)
-// CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP11]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld4_vnum_s32u10__SVBool_tPKil(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 8)
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 3
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP8]], <vscale x 4 x i32> [[TMP9]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP10]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld4_vnum_s32u10__SVBool_tPKil(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 3
-// CPP-CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP9]], <vscale x 4 x i32> [[TMP10]], i64 12)
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP11]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 3
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP8]], <vscale x 4 x i32> [[TMP9]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP10]]
  //
  svint32x4_t test_svld4_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld4_vnum,_s32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld4_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 2)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 4)
-// CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 3
-// CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP10]], i64 6)
-// CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP11]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld4_vnum_s64u10__SVBool_tPKll(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 4)
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 3
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP8]], <vscale x 2 x i64> [[TMP9]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP10]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld4_vnum_s64u10__SVBool_tPKll(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 2)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 3
-// CPP-CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP10]], i64 6)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP11]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 3
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP8]], <vscale x 2 x i64> [[TMP9]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP10]]
  //
  svint64x4_t test_svld4_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld4_vnum,_s64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld4_vnum_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_vnum_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
-// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
-// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 2
-// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 32)
-// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 3
-// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]], i64 48)
-// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP10]]
-//
-// CPP-CHECK-LABEL: @_Z18test_svld4_vnum_u8u10__SVBool_tPKhl(
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
+// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svld4_vnum_u8u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 2
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 32)
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 3
-// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]], i64 48)
-// CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP10]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
+// CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
  //
  svuint8x4_t test_svld4_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld4_vnum,_u8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld4_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 8)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 16)
-// CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 3
-// CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP9]], <vscale x 8 x i16> [[TMP10]], i64 24)
-// CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP11]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld4_vnum_u16u10__SVBool_tPKtl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 16)
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 3
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP8]], <vscale x 8 x i16> [[TMP9]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP10]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld4_vnum_u16u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP3]], 3
-// CPP-CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP9]], <vscale x 8 x i16> [[TMP10]], i64 24)
-// CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP11]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 3
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP8]], <vscale x 8 x i16> [[TMP9]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP10]]
  //
  svuint16x4_t test_svld4_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld4_vnum,_u16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld4_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 4)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 8)
-// CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 3
-// CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP9]], <vscale x 4 x i32> [[TMP10]], i64 12)
-// CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP11]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld4_vnum_u32u10__SVBool_tPKjl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 8)
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 3
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP8]], <vscale x 4 x i32> [[TMP9]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP10]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld4_vnum_u32u10__SVBool_tPKjl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP3]], 3
-// CPP-CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP9]], <vscale x 4 x i32> [[TMP10]], i64 12)
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP11]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 3
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP8]], <vscale x 4 x i32> [[TMP9]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP10]]
  //
  svuint32x4_t test_svld4_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld4_vnum,_u32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld4_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 2)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 4)
-// CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 3
-// CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP10]], i64 6)
-// CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP11]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld4_vnum_u64u10__SVBool_tPKml(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 4)
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 3
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP8]], <vscale x 2 x i64> [[TMP9]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP10]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld4_vnum_u64u10__SVBool_tPKml(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 2)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP3]], 3
-// CPP-CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP10]], i64 6)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP11]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP2]], 3
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP8]], <vscale x 2 x i64> [[TMP9]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP10]]
  //
  svuint64x4_t test_svld4_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld4_vnum,_u64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld4_vnum_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_vnum_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], i64 8)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP7]], <vscale x 8 x half> [[TMP8]], i64 16)
-// CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 3
-// CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP9]], <vscale x 8 x half> [[TMP10]], i64 24)
-// CHECK-NEXT:    ret <vscale x 32 x half> [[TMP11]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld4_vnum_f16u10__SVBool_tPKDhl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 16)
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 3
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP8]], <vscale x 8 x half> [[TMP9]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x half> [[TMP10]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld4_vnum_f16u10__SVBool_tPKDhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP7]], <vscale x 8 x half> [[TMP8]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP3]], 3
-// CPP-CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP9]], <vscale x 8 x half> [[TMP10]], i64 24)
-// CPP-CHECK-NEXT:    ret <vscale x 32 x half> [[TMP11]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4.sret.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP2]], 3
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP8]], <vscale x 8 x half> [[TMP9]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x half> [[TMP10]]
  //
  svfloat16x4_t test_svld4_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld4_vnum,_f16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld4_vnum_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_vnum_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], i64 4)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP7]], <vscale x 4 x float> [[TMP8]], i64 8)
-// CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 3
-// CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP9]], <vscale x 4 x float> [[TMP10]], i64 12)
-// CHECK-NEXT:    ret <vscale x 16 x float> [[TMP11]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld4_vnum_f32u10__SVBool_tPKfl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 8)
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 3
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP8]], <vscale x 4 x float> [[TMP9]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x float> [[TMP10]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld4_vnum_f32u10__SVBool_tPKfl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP7]], <vscale x 4 x float> [[TMP8]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP3]], 3
-// CPP-CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP9]], <vscale x 4 x float> [[TMP10]], i64 12)
-// CPP-CHECK-NEXT:    ret <vscale x 16 x float> [[TMP11]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4.sret.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 3
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP8]], <vscale x 4 x float> [[TMP9]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x float> [[TMP10]]
  //
  svfloat32x4_t test_svld4_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svld4_vnum,_f32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svld4_vnum_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svld4_vnum_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 0
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP4]], i64 0)
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 1
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], i64 2)
-// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 2
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP7]], <vscale x 2 x double> [[TMP8]], i64 4)
-// CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 3
-// CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP9]], <vscale x 2 x double> [[TMP10]], i64 6)
-// CHECK-NEXT:    ret <vscale x 8 x double> [[TMP11]]
-//
-// CPP-CHECK-LABEL: @_Z19test_svld4_vnum_f64u10__SVBool_tPKdl(
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], i64 2)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]], i64 4)
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 3
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP8]], <vscale x 2 x double> [[TMP9]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x double> [[TMP10]]
+//
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svld4_vnum_f64u10__SVBool_tPKdl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 0
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP4]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 1
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], i64 2)
-// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 2
-// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP7]], <vscale x 2 x double> [[TMP8]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP3]], 3
-// CPP-CHECK-NEXT:    [[TMP11:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP9]], <vscale x 2 x double> [[TMP10]], i64 6)
-// CPP-CHECK-NEXT:    ret <vscale x 8 x double> [[TMP11]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 2
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP2]], 3
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP8]], <vscale x 2 x double> [[TMP9]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x double> [[TMP10]]
  //
  svfloat64x4_t test_svld4_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1-bfloat.c

index f56c18c..a50d0ce 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1-bfloat.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -15,16 +15,16 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svldff1_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z17test_svldff1_bf16u10__SVBool_tPKu6__bf16(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z17test_svldff1_bf16u10__SVBool_tPKu6__bf16(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
  //
  svbfloat16_t test_svldff1_bf16(svbool_t pg, const bfloat16_t *base)
@@ -32,21 +32,19 @@ svbfloat16_t test_svldff1_bf16(svbool_t pg, const bfloat16_t *base)
    return SVE_ACLE_FUNC(svldff1,_bf16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1_vnum_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_vnum_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z22test_svldff1_vnum_bf16u10__SVBool_tPKu6__bf16l(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z22test_svldff1_vnum_bf16u10__SVBool_tPKu6__bf16l(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
  //
  svbfloat16_t test_svldff1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c

index fe29900..6fbb259 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,14 +14,14 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svldff1_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
-// CPP-CHECK-LABEL: @_Z15test_svldff1_s8u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svldff1_s8u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
  svint8_t test_svldff1_s8(svbool_t pg, const int8_t *base)
@@ -29,16 +29,16 @@ svint8_t test_svldff1_s8(svbool_t pg, const int8_t *base)
    return SVE_ACLE_FUNC(svldff1,_s8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldff1_s16u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldff1_s16u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
  svint16_t test_svldff1_s16(svbool_t pg, const int16_t *base)
@@ -46,16 +46,16 @@ svint16_t test_svldff1_s16(svbool_t pg, const int16_t *base)
    return SVE_ACLE_FUNC(svldff1,_s16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldff1_s32u10__SVBool_tPKi(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldff1_s32u10__SVBool_tPKi(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svint32_t test_svldff1_s32(svbool_t pg, const int32_t *base)
@@ -63,16 +63,16 @@ svint32_t test_svldff1_s32(svbool_t pg, const int32_t *base)
    return SVE_ACLE_FUNC(svldff1,_s32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldff1_s64u10__SVBool_tPKl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldff1_s64u10__SVBool_tPKl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svint64_t test_svldff1_s64(svbool_t pg, const int64_t *base)
@@ -80,14 +80,14 @@ svint64_t test_svldff1_s64(svbool_t pg, const int64_t *base)
    return SVE_ACLE_FUNC(svldff1,_s64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
-// CPP-CHECK-LABEL: @_Z15test_svldff1_u8u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svldff1_u8u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
  svuint8_t test_svldff1_u8(svbool_t pg, const uint8_t *base)
@@ -95,16 +95,16 @@ svuint8_t test_svldff1_u8(svbool_t pg, const uint8_t *base)
    return SVE_ACLE_FUNC(svldff1,_u8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldff1_u16u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldff1_u16u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
  svuint16_t test_svldff1_u16(svbool_t pg, const uint16_t *base)
@@ -112,16 +112,16 @@ svuint16_t test_svldff1_u16(svbool_t pg, const uint16_t *base)
    return SVE_ACLE_FUNC(svldff1,_u16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldff1_u32u10__SVBool_tPKj(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldff1_u32u10__SVBool_tPKj(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svuint32_t test_svldff1_u32(svbool_t pg, const uint32_t *base)
@@ -129,16 +129,16 @@ svuint32_t test_svldff1_u32(svbool_t pg, const uint32_t *base)
    return SVE_ACLE_FUNC(svldff1,_u32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldff1_u64u10__SVBool_tPKm(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldff1_u64u10__SVBool_tPKm(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svuint64_t test_svldff1_u64(svbool_t pg, const uint64_t *base)
@@ -146,16 +146,16 @@ svuint64_t test_svldff1_u64(svbool_t pg, const uint64_t *base)
    return SVE_ACLE_FUNC(svldff1,_u64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldff1_f16u10__SVBool_tPKDh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldff1_f16u10__SVBool_tPKDh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
  //
  svfloat16_t test_svldff1_f16(svbool_t pg, const float16_t *base)
@@ -163,16 +163,16 @@ svfloat16_t test_svldff1_f16(svbool_t pg, const float16_t *base)
    return SVE_ACLE_FUNC(svldff1,_f16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldff1_f32u10__SVBool_tPKf(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldff1_f32u10__SVBool_tPKf(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
  svfloat32_t test_svldff1_f32(svbool_t pg, const float32_t *base)
@@ -180,16 +180,16 @@ svfloat32_t test_svldff1_f32(svbool_t pg, const float32_t *base)
    return SVE_ACLE_FUNC(svldff1,_f32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldff1_f64u10__SVBool_tPKd(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldff1_f64u10__SVBool_tPKd(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
  svfloat64_t test_svldff1_f64(svbool_t pg, const float64_t *base)
@@ -197,240 +197,218 @@ svfloat64_t test_svldff1_f64(svbool_t pg, const float64_t *base)
    return SVE_ACLE_FUNC(svldff1,_f64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1_vnum_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_vnum_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z20test_svldff1_vnum_s8u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svldff1_vnum_s8u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
  svint8_t test_svldff1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldff1_vnum,_s8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldff1_vnum_s16u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldff1_vnum_s16u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
  svint16_t test_svldff1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldff1_vnum,_s16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldff1_vnum_s32u10__SVBool_tPKil(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldff1_vnum_s32u10__SVBool_tPKil(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svint32_t test_svldff1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldff1_vnum,_s32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldff1_vnum_s64u10__SVBool_tPKll(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldff1_vnum_s64u10__SVBool_tPKll(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svint64_t test_svldff1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldff1_vnum,_s64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1_vnum_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_vnum_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z20test_svldff1_vnum_u8u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svldff1_vnum_u8u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
  svuint8_t test_svldff1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldff1_vnum,_u8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldff1_vnum_u16u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldff1_vnum_u16u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
  svuint16_t test_svldff1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldff1_vnum,_u16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldff1_vnum_u32u10__SVBool_tPKjl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldff1_vnum_u32u10__SVBool_tPKjl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svuint32_t test_svldff1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldff1_vnum,_u32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldff1_vnum_u64u10__SVBool_tPKml(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldff1_vnum_u64u10__SVBool_tPKml(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svuint64_t test_svldff1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldff1_vnum,_u64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1_vnum_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_vnum_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldff1_vnum_f16u10__SVBool_tPKDhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldff1_vnum_f16u10__SVBool_tPKDhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
  //
  svfloat16_t test_svldff1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldff1_vnum,_f16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1_vnum_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_vnum_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldff1_vnum_f32u10__SVBool_tPKfl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldff1_vnum_f32u10__SVBool_tPKfl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
  //
  svfloat32_t test_svldff1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldff1_vnum,_f32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1_vnum_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_vnum_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldff1_vnum_f64u10__SVBool_tPKdl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldff1_vnum_f64u10__SVBool_tPKdl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
  //
  svfloat64_t test_svldff1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldff1_vnum,_f64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32base_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32base_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svldff1_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svldff1_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -440,13 +418,13 @@ svint32_t test_svldff1_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svldff1_gather, _u32base, _s32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svldff1_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svldff1_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -456,13 +434,13 @@ svint64_t test_svldff1_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32base_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32base_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svldff1_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svldff1_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -472,13 +450,13 @@ svuint32_t test_svldff1_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svldff1_gather, _u32base, _u32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svldff1_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svldff1_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -488,13 +466,13 @@ svuint64_t test_svldff1_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32base_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32base_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svldff1_gather_u32base_f32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svldff1_gather_u32base_f32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -504,13 +482,13 @@ svfloat32_t test_svldff1_gather_u32base_f32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svldff1_gather, _u32base, _f32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64base_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64base_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z31test_svldff1_gather_u64base_f64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svldff1_gather_u64base_f64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -520,205 +498,205 @@ svfloat64_t test_svldff1_gather_u64base_f64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1_gather, _u64base, _f64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_s32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_s32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1_gather_s32offset_s32u10__SVBool_tPKiu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1_gather_s32offset_s32u10__SVBool_tPKiu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svint32_t test_svldff1_gather_s32offset_s32(svbool_t pg, const int32_t *base, svint32_t offsets) {
    return SVE_ACLE_FUNC(svldff1_gather_, s32, offset, _s32)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1_gather_s64offset_s64u10__SVBool_tPKlu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1_gather_s64offset_s64u10__SVBool_tPKlu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svint64_t test_svldff1_gather_s64offset_s64(svbool_t pg, const int64_t *base, svint64_t offsets) {
    return SVE_ACLE_FUNC(svldff1_gather_, s64, offset, _s64)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_s32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_s32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1_gather_s32offset_u32u10__SVBool_tPKju11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1_gather_s32offset_u32u10__SVBool_tPKju11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svuint32_t test_svldff1_gather_s32offset_u32(svbool_t pg, const uint32_t *base, svint32_t offsets) {
    return SVE_ACLE_FUNC(svldff1_gather_, s32, offset, _u32)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1_gather_s64offset_u64u10__SVBool_tPKmu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1_gather_s64offset_u64u10__SVBool_tPKmu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svuint64_t test_svldff1_gather_s64offset_u64(svbool_t pg, const uint64_t *base, svint64_t offsets) {
    return SVE_ACLE_FUNC(svldff1_gather_, s64, offset, _u64)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_s32offset_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_s32offset_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1_gather_s32offset_f32u10__SVBool_tPKfu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1_gather_s32offset_f32u10__SVBool_tPKfu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
  svfloat32_t test_svldff1_gather_s32offset_f32(svbool_t pg, const float32_t *base, svint32_t offsets) {
    return SVE_ACLE_FUNC(svldff1_gather_, s32, offset, _f32)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_s64offset_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_s64offset_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1_gather_s64offset_f64u10__SVBool_tPKdu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1_gather_s64offset_f64u10__SVBool_tPKdu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
  svfloat64_t test_svldff1_gather_s64offset_f64(svbool_t pg, const float64_t *base, svint64_t offsets) {
    return SVE_ACLE_FUNC(svldff1_gather_, s64, offset, _f64)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1_gather_u32offset_s32u10__SVBool_tPKiu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1_gather_u32offset_s32u10__SVBool_tPKiu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svint32_t test_svldff1_gather_u32offset_s32(svbool_t pg, const int32_t *base, svuint32_t offsets) {
    return SVE_ACLE_FUNC(svldff1_gather_, u32, offset, _s32)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1_gather_u64offset_s64u10__SVBool_tPKlu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1_gather_u64offset_s64u10__SVBool_tPKlu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svint64_t test_svldff1_gather_u64offset_s64(svbool_t pg, const int64_t *base, svuint64_t offsets) {
    return SVE_ACLE_FUNC(svldff1_gather_, u64, offset, _s64)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1_gather_u32offset_u32u10__SVBool_tPKju12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1_gather_u32offset_u32u10__SVBool_tPKju12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svuint32_t test_svldff1_gather_u32offset_u32(svbool_t pg, const uint32_t *base, svuint32_t offsets) {
    return SVE_ACLE_FUNC(svldff1_gather_, u32, offset, _u32)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1_gather_u64offset_u64u10__SVBool_tPKmu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1_gather_u64offset_u64u10__SVBool_tPKmu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svuint64_t test_svldff1_gather_u64offset_u64(svbool_t pg, const uint64_t *base, svuint64_t offsets) {
    return SVE_ACLE_FUNC(svldff1_gather_, u64, offset, _u64)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32offset_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32offset_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1_gather_u32offset_f32u10__SVBool_tPKfu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1_gather_u32offset_f32u10__SVBool_tPKfu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
  svfloat32_t test_svldff1_gather_u32offset_f32(svbool_t pg, const float32_t *base, svuint32_t offsets) {
    return SVE_ACLE_FUNC(svldff1_gather_, u32, offset, _f32)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64offset_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64offset_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1_gather_u64offset_f64u10__SVBool_tPKdu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1_gather_u64offset_f64u10__SVBool_tPKdu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
  svfloat64_t test_svldff1_gather_u64offset_f64(svbool_t pg, const float64_t *base, svuint64_t offsets) {
    return SVE_ACLE_FUNC(svldff1_gather_, u64, offset, _f64)(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32base_offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32base_offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svldff1_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svldff1_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -728,13 +706,13 @@ svint32_t test_svldff1_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svldff1_gather, _u32base, _offset_s32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svldff1_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svldff1_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -744,13 +722,13 @@ svint64_t test_svldff1_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svldff1_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32base_offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32base_offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svldff1_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svldff1_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -760,13 +738,13 @@ svuint32_t test_svldff1_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svldff1_gather, _u32base, _offset_u32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svldff1_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svldff1_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -776,13 +754,13 @@ svuint64_t test_svldff1_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svldff1_gather, _u64base, _offset_u64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32base_offset_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32base_offset_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svldff1_gather_u32base_offset_f32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svldff1_gather_u32base_offset_f32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -792,13 +770,13 @@ svfloat32_t test_svldff1_gather_u32base_offset_f32(svbool_t pg, svuint32_t bases
    return SVE_ACLE_FUNC(svldff1_gather, _u32base, _offset_f32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64base_offset_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64base_offset_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z38test_svldff1_gather_u64base_offset_f64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z38test_svldff1_gather_u64base_offset_f64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -808,206 +786,206 @@ svfloat64_t test_svldff1_gather_u64base_offset_f64(svbool_t pg, svuint64_t bases
    return SVE_ACLE_FUNC(svldff1_gather, _u64base, _offset_f64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_s32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_s32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svldff1_gather_s32index_s32u10__SVBool_tPKiu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svldff1_gather_s32index_s32u10__SVBool_tPKiu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svint32_t test_svldff1_gather_s32index_s32(svbool_t pg, const int32_t *base, svint32_t indices) {
    return SVE_ACLE_FUNC(svldff1_gather_, s32, index, _s32)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_s64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_s64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svldff1_gather_s64index_s64u10__SVBool_tPKlu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svldff1_gather_s64index_s64u10__SVBool_tPKlu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svint64_t test_svldff1_gather_s64index_s64(svbool_t pg, const int64_t *base, svint64_t indices) {
    return SVE_ACLE_FUNC(svldff1_gather_, s64, index, _s64)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_s32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_s32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svldff1_gather_s32index_u32u10__SVBool_tPKju11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svldff1_gather_s32index_u32u10__SVBool_tPKju11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svuint32_t test_svldff1_gather_s32index_u32(svbool_t pg, const uint32_t *base, svint32_t indices) {
    return SVE_ACLE_FUNC(svldff1_gather_, s32, index, _u32)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_s64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_s64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svldff1_gather_s64index_u64u10__SVBool_tPKmu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svldff1_gather_s64index_u64u10__SVBool_tPKmu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svuint64_t test_svldff1_gather_s64index_u64(svbool_t pg, const uint64_t *base, svint64_t indices) {
    return SVE_ACLE_FUNC(svldff1_gather_, s64, index, _u64)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_s32index_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_s32index_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svldff1_gather_s32index_f32u10__SVBool_tPKfu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svldff1_gather_s32index_f32u10__SVBool_tPKfu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
  svfloat32_t test_svldff1_gather_s32index_f32(svbool_t pg, const float32_t *base, svint32_t indices) {
    return SVE_ACLE_FUNC(svldff1_gather_, s32, index, _f32)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_s64index_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_s64index_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svldff1_gather_s64index_f64u10__SVBool_tPKdu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svldff1_gather_s64index_f64u10__SVBool_tPKdu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
  svfloat64_t test_svldff1_gather_s64index_f64(svbool_t pg, const float64_t *base, svint64_t indices) {
    return SVE_ACLE_FUNC(svldff1_gather_, s64, index, _f64)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svldff1_gather_u32index_s32u10__SVBool_tPKiu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svldff1_gather_u32index_s32u10__SVBool_tPKiu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svint32_t test_svldff1_gather_u32index_s32(svbool_t pg, const int32_t *base, svuint32_t indices) {
    return SVE_ACLE_FUNC(svldff1_gather_, u32, index, _s32)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svldff1_gather_u64index_s64u10__SVBool_tPKlu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svldff1_gather_u64index_s64u10__SVBool_tPKlu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svint64_t test_svldff1_gather_u64index_s64(svbool_t pg, const int64_t *base, svuint64_t indices) {
    return SVE_ACLE_FUNC(svldff1_gather_, u64, index, _s64)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svldff1_gather_u32index_u32u10__SVBool_tPKju12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svldff1_gather_u32index_u32u10__SVBool_tPKju12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svuint32_t test_svldff1_gather_u32index_u32(svbool_t pg, const uint32_t *base, svuint32_t indices) {
    return SVE_ACLE_FUNC(svldff1_gather_, u32, index, _u32)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svldff1_gather_u64index_u64u10__SVBool_tPKmu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svldff1_gather_u64index_u64u10__SVBool_tPKmu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svuint64_t test_svldff1_gather_u64index_u64(svbool_t pg, const uint64_t *base, svuint64_t indices) {
    return SVE_ACLE_FUNC(svldff1_gather_, u64, index, _u64)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32index_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32index_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svldff1_gather_u32index_f32u10__SVBool_tPKfu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svldff1_gather_u32index_f32u10__SVBool_tPKfu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
  svfloat32_t test_svldff1_gather_u32index_f32(svbool_t pg, const float32_t *base, svuint32_t indices) {
    return SVE_ACLE_FUNC(svldff1_gather_, u32, index, _f32)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64index_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64index_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z32test_svldff1_gather_u64index_f64u10__SVBool_tPKdu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svldff1_gather_u64index_f64u10__SVBool_tPKdu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
  svfloat64_t test_svldff1_gather_u64index_f64(svbool_t pg, const float64_t *base, svuint64_t indices) {
    return SVE_ACLE_FUNC(svldff1_gather_, u64, index, _f64)(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32base_index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32base_index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svldff1_gather_u32base_index_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svldff1_gather_u32base_index_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -1018,14 +996,14 @@ svint32_t test_svldff1_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, i
    return SVE_ACLE_FUNC(svldff1_gather, _u32base, _index_s32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64base_index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64base_index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svldff1_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svldff1_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
@@ -1036,14 +1014,14 @@ svint64_t test_svldff1_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, i
    return SVE_ACLE_FUNC(svldff1_gather, _u64base, _index_s64, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32base_index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32base_index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svldff1_gather_u32base_index_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svldff1_gather_u32base_index_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -1054,14 +1032,14 @@ svuint32_t test_svldff1_gather_u32base_index_u32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svldff1_gather, _u32base, _index_u32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64base_index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64base_index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svldff1_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svldff1_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
@@ -1072,14 +1050,14 @@ svuint64_t test_svldff1_gather_u64base_index_u64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svldff1_gather, _u64base, _index_u64, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u32base_index_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u32base_index_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svldff1_gather_u32base_index_f32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svldff1_gather_u32base_index_f32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -1090,14 +1068,14 @@ svfloat32_t test_svldff1_gather_u32base_index_f32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svldff1_gather, _u32base, _index_f32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svldff1_gather_u64base_index_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1_gather_u64base_index_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z37test_svldff1_gather_u64base_index_f64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svldff1_gather_u64base_index_f64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c

index 95f77a4..252fe90 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,17 +14,17 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svldff1sb_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1sb_s16u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1sb_s16u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
@@ -33,17 +33,17 @@ svint16_t test_svldff1sb_s16(svbool_t pg, const int8_t *base)
    return svldff1sb_s16(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1sb_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1sb_s32u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1sb_s32u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -52,17 +52,17 @@ svint32_t test_svldff1sb_s32(svbool_t pg, const int8_t *base)
    return svldff1sb_s32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1sb_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1sb_s64u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1sb_s64u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -71,17 +71,17 @@ svint64_t test_svldff1sb_s64(svbool_t pg, const int8_t *base)
    return svldff1sb_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1sb_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1sb_u16u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1sb_u16u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
@@ -90,17 +90,17 @@ svuint16_t test_svldff1sb_u16(svbool_t pg, const int8_t *base)
    return svldff1sb_u16(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1sb_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1sb_u32u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1sb_u32u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -109,17 +109,17 @@ svuint32_t test_svldff1sb_u32(svbool_t pg, const int8_t *base)
    return svldff1sb_u32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1sb_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1sb_u64u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1sb_u64u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -128,152 +128,140 @@ svuint64_t test_svldff1sb_u64(svbool_t pg, const int8_t *base)
    return svldff1sb_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1sb_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1sb_vnum_s16u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1sb_vnum_s16u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
  svint16_t test_svldff1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svldff1sb_vnum_s16(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1sb_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1sb_vnum_s32u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1sb_vnum_s32u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svint32_t test_svldff1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svldff1sb_vnum_s32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1sb_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1sb_vnum_s64u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1sb_vnum_s64u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svldff1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svldff1sb_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1sb_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1sb_vnum_u16u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1sb_vnum_u16u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
  svuint16_t test_svldff1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svldff1sb_vnum_u16(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1sb_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1sb_vnum_u32u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1sb_vnum_u32u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svuint32_t test_svldff1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svldff1sb_vnum_u32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1sb_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1sb_vnum_u64u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1sb_vnum_u64u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svldff1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svldff1sb_vnum_u64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_u32base_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_u32base_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1sb_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1sb_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -284,14 +272,14 @@ svint32_t test_svldff1sb_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svldff1sb_gather, _u32base, _s32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1sb_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1sb_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -302,14 +290,14 @@ svint64_t test_svldff1sb_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1sb_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_u32base_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_u32base_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1sb_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1sb_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -320,14 +308,14 @@ svuint32_t test_svldff1sb_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svldff1sb_gather, _u32base, _u32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1sb_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1sb_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -338,17 +326,17 @@ svuint64_t test_svldff1sb_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1sb_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_s32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_s32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sb_gather_s32offset_s32u10__SVBool_tPKau11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sb_gather_s32offset_s32u10__SVBool_tPKau11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -356,17 +344,17 @@ svint32_t test_svldff1sb_gather_s32offset_s32(svbool_t pg, const int8_t *base, s
    return SVE_ACLE_FUNC(svldff1sb_gather_, s32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sb_gather_s64offset_s64u10__SVBool_tPKau11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sb_gather_s64offset_s64u10__SVBool_tPKau11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -374,17 +362,17 @@ svint64_t test_svldff1sb_gather_s64offset_s64(svbool_t pg, const int8_t *base, s
    return SVE_ACLE_FUNC(svldff1sb_gather_, s64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_s32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_s32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sb_gather_s32offset_u32u10__SVBool_tPKau11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sb_gather_s32offset_u32u10__SVBool_tPKau11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -392,17 +380,17 @@ svuint32_t test_svldff1sb_gather_s32offset_u32(svbool_t pg, const int8_t *base,
    return SVE_ACLE_FUNC(svldff1sb_gather_, s32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sb_gather_s64offset_u64u10__SVBool_tPKau11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sb_gather_s64offset_u64u10__SVBool_tPKau11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -410,17 +398,17 @@ svuint64_t test_svldff1sb_gather_s64offset_u64(svbool_t pg, const int8_t *base,
    return SVE_ACLE_FUNC(svldff1sb_gather_, s64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_u32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_u32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sb_gather_u32offset_s32u10__SVBool_tPKau12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sb_gather_u32offset_s32u10__SVBool_tPKau12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -428,17 +416,17 @@ svint32_t test_svldff1sb_gather_u32offset_s32(svbool_t pg, const int8_t *base, s
    return SVE_ACLE_FUNC(svldff1sb_gather_, u32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sb_gather_u64offset_s64u10__SVBool_tPKau12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sb_gather_u64offset_s64u10__SVBool_tPKau12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -446,17 +434,17 @@ svint64_t test_svldff1sb_gather_u64offset_s64(svbool_t pg, const int8_t *base, s
    return SVE_ACLE_FUNC(svldff1sb_gather_, u64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_u32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_u32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sb_gather_u32offset_u32u10__SVBool_tPKau12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sb_gather_u32offset_u32u10__SVBool_tPKau12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -464,17 +452,17 @@ svuint32_t test_svldff1sb_gather_u32offset_u32(svbool_t pg, const int8_t *base,
    return SVE_ACLE_FUNC(svldff1sb_gather_, u32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sb_gather_u64offset_u64u10__SVBool_tPKau12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sb_gather_u64offset_u64u10__SVBool_tPKau12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -482,14 +470,14 @@ svuint64_t test_svldff1sb_gather_u64offset_u64(svbool_t pg, const int8_t *base,
    return SVE_ACLE_FUNC(svldff1sb_gather_, u64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_u32base_offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_u32base_offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1sb_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1sb_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -500,14 +488,14 @@ svint32_t test_svldff1sb_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases
    return SVE_ACLE_FUNC(svldff1sb_gather, _u32base, _offset_s32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1sb_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1sb_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -518,14 +506,14 @@ svint64_t test_svldff1sb_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases
    return SVE_ACLE_FUNC(svldff1sb_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_u32base_offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_u32base_offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1sb_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1sb_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -536,14 +524,14 @@ svuint32_t test_svldff1sb_gather_u32base_offset_u32(svbool_t pg, svuint32_t base
    return SVE_ACLE_FUNC(svldff1sb_gather, _u32base, _offset_u32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1sb_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sb_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1sb_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1sb_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c

index f1cbb79..95e9ce1 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,17 +14,17 @@
  #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svldff1sh_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1sh_s32u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1sh_s32u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -33,17 +33,17 @@ svint32_t test_svldff1sh_s32(svbool_t pg, const int16_t *base)
    return svldff1sh_s32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1sh_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1sh_s64u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1sh_s64u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -52,17 +52,17 @@ svint64_t test_svldff1sh_s64(svbool_t pg, const int16_t *base)
    return svldff1sh_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1sh_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1sh_u32u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1sh_u32u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -71,17 +71,17 @@ svuint32_t test_svldff1sh_u32(svbool_t pg, const int16_t *base)
    return svldff1sh_u32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1sh_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1sh_u64u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1sh_u64u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -90,106 +90,98 @@ svuint64_t test_svldff1sh_u64(svbool_t pg, const int16_t *base)
    return svldff1sh_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1sh_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1sh_vnum_s32u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1sh_vnum_s32u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svint32_t test_svldff1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return svldff1sh_vnum_s32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1sh_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1sh_vnum_s64u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1sh_vnum_s64u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svldff1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return svldff1sh_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1sh_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1sh_vnum_u32u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1sh_vnum_u32u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svuint32_t test_svldff1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return svldff1sh_vnum_u32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1sh_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1sh_vnum_u64u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1sh_vnum_u64u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svldff1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return svldff1sh_vnum_u64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u32base_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u32base_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1sh_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1sh_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -200,14 +192,14 @@ svint32_t test_svldff1sh_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svldff1sh_gather, _u32base, _s32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1sh_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1sh_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -218,14 +210,14 @@ svint64_t test_svldff1sh_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1sh_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u32base_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u32base_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1sh_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1sh_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -236,14 +228,14 @@ svuint32_t test_svldff1sh_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svldff1sh_gather, _u32base, _u32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1sh_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1sh_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -254,17 +246,17 @@ svuint64_t test_svldff1sh_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1sh_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_s32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_s32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sh_gather_s32offset_s32u10__SVBool_tPKsu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sh_gather_s32offset_s32u10__SVBool_tPKsu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -272,17 +264,17 @@ svint32_t test_svldff1sh_gather_s32offset_s32(svbool_t pg, const int16_t *base,
    return SVE_ACLE_FUNC(svldff1sh_gather_, s32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sh_gather_s64offset_s64u10__SVBool_tPKsu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sh_gather_s64offset_s64u10__SVBool_tPKsu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -290,17 +282,17 @@ svint64_t test_svldff1sh_gather_s64offset_s64(svbool_t pg, const int16_t *base,
    return SVE_ACLE_FUNC(svldff1sh_gather_, s64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_s32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_s32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sh_gather_s32offset_u32u10__SVBool_tPKsu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sh_gather_s32offset_u32u10__SVBool_tPKsu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -308,17 +300,17 @@ svuint32_t test_svldff1sh_gather_s32offset_u32(svbool_t pg, const int16_t *base,
    return SVE_ACLE_FUNC(svldff1sh_gather_, s32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sh_gather_s64offset_u64u10__SVBool_tPKsu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sh_gather_s64offset_u64u10__SVBool_tPKsu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -326,17 +318,17 @@ svuint64_t test_svldff1sh_gather_s64offset_u64(svbool_t pg, const int16_t *base,
    return SVE_ACLE_FUNC(svldff1sh_gather_, s64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sh_gather_u32offset_s32u10__SVBool_tPKsu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sh_gather_u32offset_s32u10__SVBool_tPKsu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -344,17 +336,17 @@ svint32_t test_svldff1sh_gather_u32offset_s32(svbool_t pg, const int16_t *base,
    return SVE_ACLE_FUNC(svldff1sh_gather_, u32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sh_gather_u64offset_s64u10__SVBool_tPKsu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sh_gather_u64offset_s64u10__SVBool_tPKsu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -362,17 +354,17 @@ svint64_t test_svldff1sh_gather_u64offset_s64(svbool_t pg, const int16_t *base,
    return SVE_ACLE_FUNC(svldff1sh_gather_, u64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sh_gather_u32offset_u32u10__SVBool_tPKsu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sh_gather_u32offset_u32u10__SVBool_tPKsu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -380,17 +372,17 @@ svuint32_t test_svldff1sh_gather_u32offset_u32(svbool_t pg, const int16_t *base,
    return SVE_ACLE_FUNC(svldff1sh_gather_, u32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sh_gather_u64offset_u64u10__SVBool_tPKsu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sh_gather_u64offset_u64u10__SVBool_tPKsu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -398,14 +390,14 @@ svuint64_t test_svldff1sh_gather_u64offset_u64(svbool_t pg, const int16_t *base,
    return SVE_ACLE_FUNC(svldff1sh_gather_, u64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u32base_offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u32base_offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1sh_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1sh_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -416,14 +408,14 @@ svint32_t test_svldff1sh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases
    return SVE_ACLE_FUNC(svldff1sh_gather, _u32base, _offset_s32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1sh_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1sh_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -434,14 +426,14 @@ svint64_t test_svldff1sh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases
    return SVE_ACLE_FUNC(svldff1sh_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u32base_offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u32base_offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1sh_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1sh_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -452,14 +444,14 @@ svuint32_t test_svldff1sh_gather_u32base_offset_u32(svbool_t pg, svuint32_t base
    return SVE_ACLE_FUNC(svldff1sh_gather, _u32base, _offset_u32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1sh_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1sh_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -470,17 +462,17 @@ svuint64_t test_svldff1sh_gather_u64base_offset_u64(svbool_t pg, svuint64_t base
    return SVE_ACLE_FUNC(svldff1sh_gather, _u64base, _offset_u64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_s32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_s32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1sh_gather_s32index_s32u10__SVBool_tPKsu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1sh_gather_s32index_s32u10__SVBool_tPKsu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -488,17 +480,17 @@ svint32_t test_svldff1sh_gather_s32index_s32(svbool_t pg, const int16_t *base, s
    return SVE_ACLE_FUNC(svldff1sh_gather_, s32, index_s32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_s64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_s64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1sh_gather_s64index_s64u10__SVBool_tPKsu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1sh_gather_s64index_s64u10__SVBool_tPKsu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -506,17 +498,17 @@ svint64_t test_svldff1sh_gather_s64index_s64(svbool_t pg, const int16_t *base, s
    return SVE_ACLE_FUNC(svldff1sh_gather_, s64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_s32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_s32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1sh_gather_s32index_u32u10__SVBool_tPKsu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1sh_gather_s32index_u32u10__SVBool_tPKsu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -524,17 +516,17 @@ svuint32_t test_svldff1sh_gather_s32index_u32(svbool_t pg, const int16_t *base,
    return SVE_ACLE_FUNC(svldff1sh_gather_, s32, index_u32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_s64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_s64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1sh_gather_s64index_u64u10__SVBool_tPKsu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1sh_gather_s64index_u64u10__SVBool_tPKsu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -542,17 +534,17 @@ svuint64_t test_svldff1sh_gather_s64index_u64(svbool_t pg, const int16_t *base,
    return SVE_ACLE_FUNC(svldff1sh_gather_, s64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1sh_gather_u32index_s32u10__SVBool_tPKsu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1sh_gather_u32index_s32u10__SVBool_tPKsu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -560,17 +552,17 @@ svint32_t test_svldff1sh_gather_u32index_s32(svbool_t pg, const int16_t *base, s
    return SVE_ACLE_FUNC(svldff1sh_gather_, u32, index_s32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1sh_gather_u64index_s64u10__SVBool_tPKsu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1sh_gather_u64index_s64u10__SVBool_tPKsu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -578,17 +570,17 @@ svint64_t test_svldff1sh_gather_u64index_s64(svbool_t pg, const int16_t *base, s
    return SVE_ACLE_FUNC(svldff1sh_gather_, u64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1sh_gather_u32index_u32u10__SVBool_tPKsu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1sh_gather_u32index_u32u10__SVBool_tPKsu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -596,17 +588,17 @@ svuint32_t test_svldff1sh_gather_u32index_u32(svbool_t pg, const int16_t *base,
    return SVE_ACLE_FUNC(svldff1sh_gather_, u32, index_u32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1sh_gather_u64index_u64u10__SVBool_tPKsu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1sh_gather_u64index_u64u10__SVBool_tPKsu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -614,7 +606,7 @@ svuint64_t test_svldff1sh_gather_u64index_u64(svbool_t pg, const int16_t *base,
    return SVE_ACLE_FUNC(svldff1sh_gather_, u64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u32base_index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u32base_index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -622,7 +614,7 @@ svuint64_t test_svldff1sh_gather_u64index_u64(svbool_t pg, const int16_t *base,
  // CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z39test_svldff1sh_gather_u32base_index_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z39test_svldff1sh_gather_u32base_index_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -634,7 +626,7 @@ svint32_t test_svldff1sh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svldff1sh_gather, _u32base, _index_s32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u64base_index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u64base_index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -642,7 +634,7 @@ svint32_t test_svldff1sh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases,
  // CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z39test_svldff1sh_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z39test_svldff1sh_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -654,7 +646,7 @@ svint64_t test_svldff1sh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svldff1sh_gather, _u64base, _index_s64, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u32base_index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u32base_index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -662,7 +654,7 @@ svint64_t test_svldff1sh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases,
  // CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z39test_svldff1sh_gather_u32base_index_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z39test_svldff1sh_gather_u32base_index_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -674,7 +666,7 @@ svuint32_t test_svldff1sh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases
    return SVE_ACLE_FUNC(svldff1sh_gather, _u32base, _index_u32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svldff1sh_gather_u64base_index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sh_gather_u64base_index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -682,7 +674,7 @@ svuint32_t test_svldff1sh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases
  // CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z39test_svldff1sh_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z39test_svldff1sh_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c

index 62f076a..3bbd429 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,17 +14,17 @@
  #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svldff1sw_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1sw_s64u10__SVBool_tPKi(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1sw_s64u10__SVBool_tPKi(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -33,17 +33,17 @@ svint64_t test_svldff1sw_s64(svbool_t pg, const int32_t *base)
    return svldff1sw_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1sw_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1sw_u64u10__SVBool_tPKi(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1sw_u64u10__SVBool_tPKi(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -52,60 +52,56 @@ svuint64_t test_svldff1sw_u64(svbool_t pg, const int32_t *base)
    return svldff1sw_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1sw_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1sw_vnum_s64u10__SVBool_tPKil(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1sw_vnum_s64u10__SVBool_tPKil(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svldff1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum)
  {
    return svldff1sw_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1sw_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1sw_vnum_u64u10__SVBool_tPKil(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1sw_vnum_u64u10__SVBool_tPKil(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svldff1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum)
  {
    return svldff1sw_vnum_u64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1sw_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1sw_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -116,14 +112,14 @@ svint64_t test_svldff1sw_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1sw_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1sw_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1sw_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -134,17 +130,17 @@ svuint64_t test_svldff1sw_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1sw_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sw_gather_s64offset_s64u10__SVBool_tPKiu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sw_gather_s64offset_s64u10__SVBool_tPKiu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -152,17 +148,17 @@ svint64_t test_svldff1sw_gather_s64offset_s64(svbool_t pg, const int32_t *base,
    return SVE_ACLE_FUNC(svldff1sw_gather_, s64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sw_gather_s64offset_u64u10__SVBool_tPKiu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sw_gather_s64offset_u64u10__SVBool_tPKiu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -170,17 +166,17 @@ svuint64_t test_svldff1sw_gather_s64offset_u64(svbool_t pg, const int32_t *base,
    return SVE_ACLE_FUNC(svldff1sw_gather_, s64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sw_gather_u64offset_s64u10__SVBool_tPKiu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sw_gather_u64offset_s64u10__SVBool_tPKiu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -188,17 +184,17 @@ svint64_t test_svldff1sw_gather_u64offset_s64(svbool_t pg, const int32_t *base,
    return SVE_ACLE_FUNC(svldff1sw_gather_, u64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1sw_gather_u64offset_u64u10__SVBool_tPKiu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1sw_gather_u64offset_u64u10__SVBool_tPKiu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -206,14 +202,14 @@ svuint64_t test_svldff1sw_gather_u64offset_u64(svbool_t pg, const int32_t *base,
    return SVE_ACLE_FUNC(svldff1sw_gather_, u64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1sw_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1sw_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -224,14 +220,14 @@ svint64_t test_svldff1sw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases
    return SVE_ACLE_FUNC(svldff1sw_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1sw_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1sw_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -242,17 +238,17 @@ svuint64_t test_svldff1sw_gather_u64base_offset_u64(svbool_t pg, svuint64_t base
    return SVE_ACLE_FUNC(svldff1sw_gather, _u64base, _offset_u64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_s64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_s64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1sw_gather_s64index_s64u10__SVBool_tPKiu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1sw_gather_s64index_s64u10__SVBool_tPKiu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -260,17 +256,17 @@ svint64_t test_svldff1sw_gather_s64index_s64(svbool_t pg, const int32_t *base, s
    return SVE_ACLE_FUNC(svldff1sw_gather_, s64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_s64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_s64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1sw_gather_s64index_u64u10__SVBool_tPKiu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1sw_gather_s64index_u64u10__SVBool_tPKiu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -278,17 +274,17 @@ svuint64_t test_svldff1sw_gather_s64index_u64(svbool_t pg, const int32_t *base,
    return SVE_ACLE_FUNC(svldff1sw_gather_, s64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_u64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_u64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1sw_gather_u64index_s64u10__SVBool_tPKiu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1sw_gather_u64index_s64u10__SVBool_tPKiu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -296,17 +292,17 @@ svint64_t test_svldff1sw_gather_u64index_s64(svbool_t pg, const int32_t *base, s
    return SVE_ACLE_FUNC(svldff1sw_gather_, u64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_u64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_u64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1sw_gather_u64index_u64u10__SVBool_tPKiu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1sw_gather_u64index_u64u10__SVBool_tPKiu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -314,7 +310,7 @@ svuint64_t test_svldff1sw_gather_u64index_u64(svbool_t pg, const int32_t *base,
    return SVE_ACLE_FUNC(svldff1sw_gather_, u64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_u64base_index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_u64base_index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -322,7 +318,7 @@ svuint64_t test_svldff1sw_gather_u64index_u64(svbool_t pg, const int32_t *base,
  // CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z39test_svldff1sw_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z39test_svldff1sw_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -334,7 +330,7 @@ svint64_t test_svldff1sw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svldff1sw_gather, _u64base, _index_s64, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svldff1sw_gather_u64base_index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1sw_gather_u64base_index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -342,7 +338,7 @@ svint64_t test_svldff1sw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases,
  // CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z39test_svldff1sw_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z39test_svldff1sw_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c

index cd33131..942e53f 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,17 +14,17 @@
  #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svldff1ub_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1ub_s16u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1ub_s16u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
@@ -33,17 +33,17 @@ svint16_t test_svldff1ub_s16(svbool_t pg, const uint8_t *base)
    return svldff1ub_s16(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1ub_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1ub_s32u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1ub_s32u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -52,17 +52,17 @@ svint32_t test_svldff1ub_s32(svbool_t pg, const uint8_t *base)
    return svldff1ub_s32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1ub_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1ub_s64u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1ub_s64u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -71,17 +71,17 @@ svint64_t test_svldff1ub_s64(svbool_t pg, const uint8_t *base)
    return svldff1ub_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1ub_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1ub_u16u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1ub_u16u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
@@ -90,17 +90,17 @@ svuint16_t test_svldff1ub_u16(svbool_t pg, const uint8_t *base)
    return svldff1ub_u16(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1ub_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1ub_u32u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1ub_u32u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -109,17 +109,17 @@ svuint32_t test_svldff1ub_u32(svbool_t pg, const uint8_t *base)
    return svldff1ub_u32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1ub_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1ub_u64u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1ub_u64u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -128,152 +128,140 @@ svuint64_t test_svldff1ub_u64(svbool_t pg, const uint8_t *base)
    return svldff1ub_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1ub_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1ub_vnum_s16u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1ub_vnum_s16u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
  svint16_t test_svldff1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svldff1ub_vnum_s16(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1ub_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1ub_vnum_s32u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1ub_vnum_s32u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svint32_t test_svldff1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svldff1ub_vnum_s32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1ub_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1ub_vnum_s64u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1ub_vnum_s64u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svldff1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svldff1ub_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1ub_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1ub_vnum_u16u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1ub_vnum_u16u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
  svuint16_t test_svldff1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svldff1ub_vnum_u16(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1ub_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1ub_vnum_u32u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1ub_vnum_u32u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svuint32_t test_svldff1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svldff1ub_vnum_u32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1ub_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1ub_vnum_u64u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1ub_vnum_u64u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svldff1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svldff1ub_vnum_u64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_u32base_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_u32base_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1ub_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1ub_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -284,14 +272,14 @@ svint32_t test_svldff1ub_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svldff1ub_gather, _u32base, _s32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1ub_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1ub_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -302,14 +290,14 @@ svint64_t test_svldff1ub_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1ub_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_u32base_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_u32base_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1ub_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1ub_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -320,14 +308,14 @@ svuint32_t test_svldff1ub_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svldff1ub_gather, _u32base, _u32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1ub_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1ub_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -338,17 +326,17 @@ svuint64_t test_svldff1ub_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1ub_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_s32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_s32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1ub_gather_s32offset_s32u10__SVBool_tPKhu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1ub_gather_s32offset_s32u10__SVBool_tPKhu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -356,17 +344,17 @@ svint32_t test_svldff1ub_gather_s32offset_s32(svbool_t pg, const uint8_t *base,
    return SVE_ACLE_FUNC(svldff1ub_gather_, s32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1ub_gather_s64offset_s64u10__SVBool_tPKhu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1ub_gather_s64offset_s64u10__SVBool_tPKhu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -374,17 +362,17 @@ svint64_t test_svldff1ub_gather_s64offset_s64(svbool_t pg, const uint8_t *base,
    return SVE_ACLE_FUNC(svldff1ub_gather_, s64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_s32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_s32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1ub_gather_s32offset_u32u10__SVBool_tPKhu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1ub_gather_s32offset_u32u10__SVBool_tPKhu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -392,17 +380,17 @@ svuint32_t test_svldff1ub_gather_s32offset_u32(svbool_t pg, const uint8_t *base,
    return SVE_ACLE_FUNC(svldff1ub_gather_, s32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1ub_gather_s64offset_u64u10__SVBool_tPKhu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1ub_gather_s64offset_u64u10__SVBool_tPKhu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -410,17 +398,17 @@ svuint64_t test_svldff1ub_gather_s64offset_u64(svbool_t pg, const uint8_t *base,
    return SVE_ACLE_FUNC(svldff1ub_gather_, s64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_u32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_u32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1ub_gather_u32offset_s32u10__SVBool_tPKhu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1ub_gather_u32offset_s32u10__SVBool_tPKhu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -428,17 +416,17 @@ svint32_t test_svldff1ub_gather_u32offset_s32(svbool_t pg, const uint8_t *base,
    return SVE_ACLE_FUNC(svldff1ub_gather_, u32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1ub_gather_u64offset_s64u10__SVBool_tPKhu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1ub_gather_u64offset_s64u10__SVBool_tPKhu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -446,17 +434,17 @@ svint64_t test_svldff1ub_gather_u64offset_s64(svbool_t pg, const uint8_t *base,
    return SVE_ACLE_FUNC(svldff1ub_gather_, u64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_u32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_u32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1ub_gather_u32offset_u32u10__SVBool_tPKhu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1ub_gather_u32offset_u32u10__SVBool_tPKhu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -464,17 +452,17 @@ svuint32_t test_svldff1ub_gather_u32offset_u32(svbool_t pg, const uint8_t *base,
    return SVE_ACLE_FUNC(svldff1ub_gather_, u32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1ub_gather_u64offset_u64u10__SVBool_tPKhu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1ub_gather_u64offset_u64u10__SVBool_tPKhu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -482,14 +470,14 @@ svuint64_t test_svldff1ub_gather_u64offset_u64(svbool_t pg, const uint8_t *base,
    return SVE_ACLE_FUNC(svldff1ub_gather_, u64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_u32base_offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_u32base_offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1ub_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1ub_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -500,14 +488,14 @@ svint32_t test_svldff1ub_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases
    return SVE_ACLE_FUNC(svldff1ub_gather, _u32base, _offset_s32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1ub_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1ub_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -518,14 +506,14 @@ svint64_t test_svldff1ub_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases
    return SVE_ACLE_FUNC(svldff1ub_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_u32base_offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_u32base_offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1ub_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1ub_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -536,14 +524,14 @@ svuint32_t test_svldff1ub_gather_u32base_offset_u32(svbool_t pg, svuint32_t base
    return SVE_ACLE_FUNC(svldff1ub_gather, _u32base, _offset_u32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1ub_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1ub_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1ub_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1ub_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c

index 8fc41ac..d967bf1 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,17 +14,17 @@
  #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svldff1uh_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1uh_s32u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1uh_s32u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -33,17 +33,17 @@ svint32_t test_svldff1uh_s32(svbool_t pg, const uint16_t *base)
    return svldff1uh_s32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1uh_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1uh_s64u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1uh_s64u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -52,17 +52,17 @@ svint64_t test_svldff1uh_s64(svbool_t pg, const uint16_t *base)
    return svldff1uh_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1uh_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1uh_u32u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1uh_u32u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -71,17 +71,17 @@ svuint32_t test_svldff1uh_u32(svbool_t pg, const uint16_t *base)
    return svldff1uh_u32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1uh_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1uh_u64u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1uh_u64u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -90,106 +90,98 @@ svuint64_t test_svldff1uh_u64(svbool_t pg, const uint16_t *base)
    return svldff1uh_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1uh_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1uh_vnum_s32u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1uh_vnum_s32u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svint32_t test_svldff1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return svldff1uh_vnum_s32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1uh_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1uh_vnum_s64u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1uh_vnum_s64u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svldff1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return svldff1uh_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1uh_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1uh_vnum_u32u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1uh_vnum_u32u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svuint32_t test_svldff1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return svldff1uh_vnum_u32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1uh_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1uh_vnum_u64u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1uh_vnum_u64u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svldff1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return svldff1uh_vnum_u64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u32base_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u32base_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1uh_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1uh_gather_u32base_s32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -200,14 +192,14 @@ svint32_t test_svldff1uh_gather_u32base_s32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svldff1uh_gather, _u32base, _s32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1uh_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1uh_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -218,14 +210,14 @@ svint64_t test_svldff1uh_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1uh_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u32base_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u32base_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1uh_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1uh_gather_u32base_u32u10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -236,14 +228,14 @@ svuint32_t test_svldff1uh_gather_u32base_u32(svbool_t pg, svuint32_t bases) {
    return SVE_ACLE_FUNC(svldff1uh_gather, _u32base, _u32, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1uh_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1uh_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -254,17 +246,17 @@ svuint64_t test_svldff1uh_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1uh_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_s32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_s32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1uh_gather_s32offset_s32u10__SVBool_tPKtu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1uh_gather_s32offset_s32u10__SVBool_tPKtu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -272,17 +264,17 @@ svint32_t test_svldff1uh_gather_s32offset_s32(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svldff1uh_gather_, s32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1uh_gather_s64offset_s64u10__SVBool_tPKtu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1uh_gather_s64offset_s64u10__SVBool_tPKtu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -290,17 +282,17 @@ svint64_t test_svldff1uh_gather_s64offset_s64(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svldff1uh_gather_, s64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_s32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_s32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1uh_gather_s32offset_u32u10__SVBool_tPKtu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1uh_gather_s32offset_u32u10__SVBool_tPKtu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -308,17 +300,17 @@ svuint32_t test_svldff1uh_gather_s32offset_u32(svbool_t pg, const uint16_t *base
    return SVE_ACLE_FUNC(svldff1uh_gather_, s32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1uh_gather_s64offset_u64u10__SVBool_tPKtu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1uh_gather_s64offset_u64u10__SVBool_tPKtu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -326,17 +318,17 @@ svuint64_t test_svldff1uh_gather_s64offset_u64(svbool_t pg, const uint16_t *base
    return SVE_ACLE_FUNC(svldff1uh_gather_, s64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1uh_gather_u32offset_s32u10__SVBool_tPKtu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1uh_gather_u32offset_s32u10__SVBool_tPKtu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -344,17 +336,17 @@ svint32_t test_svldff1uh_gather_u32offset_s32(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svldff1uh_gather_, u32, offset_s32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1uh_gather_u64offset_s64u10__SVBool_tPKtu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1uh_gather_u64offset_s64u10__SVBool_tPKtu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -362,17 +354,17 @@ svint64_t test_svldff1uh_gather_u64offset_s64(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svldff1uh_gather_, u64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1uh_gather_u32offset_u32u10__SVBool_tPKtu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1uh_gather_u32offset_u32u10__SVBool_tPKtu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -380,17 +372,17 @@ svuint32_t test_svldff1uh_gather_u32offset_u32(svbool_t pg, const uint16_t *base
    return SVE_ACLE_FUNC(svldff1uh_gather_, u32, offset_u32, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1uh_gather_u64offset_u64u10__SVBool_tPKtu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1uh_gather_u64offset_u64u10__SVBool_tPKtu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -398,14 +390,14 @@ svuint64_t test_svldff1uh_gather_u64offset_u64(svbool_t pg, const uint16_t *base
    return SVE_ACLE_FUNC(svldff1uh_gather_, u64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u32base_offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u32base_offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1uh_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1uh_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -416,14 +408,14 @@ svint32_t test_svldff1uh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases
    return SVE_ACLE_FUNC(svldff1uh_gather, _u32base, _offset_s32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1uh_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1uh_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -434,14 +426,14 @@ svint64_t test_svldff1uh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases
    return SVE_ACLE_FUNC(svldff1uh_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u32base_offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u32base_offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1uh_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1uh_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -452,14 +444,14 @@ svuint32_t test_svldff1uh_gather_u32base_offset_u32(svbool_t pg, svuint32_t base
    return SVE_ACLE_FUNC(svldff1uh_gather, _u32base, _offset_u32, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1uh_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1uh_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -470,17 +462,17 @@ svuint64_t test_svldff1uh_gather_u64base_offset_u64(svbool_t pg, svuint64_t base
    return SVE_ACLE_FUNC(svldff1uh_gather, _u64base, _offset_u64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_s32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_s32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1uh_gather_s32index_s32u10__SVBool_tPKtu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1uh_gather_s32index_s32u10__SVBool_tPKtu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -488,17 +480,17 @@ svint32_t test_svldff1uh_gather_s32index_s32(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svldff1uh_gather_, s32, index_s32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_s64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_s64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1uh_gather_s64index_s64u10__SVBool_tPKtu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1uh_gather_s64index_s64u10__SVBool_tPKtu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -506,17 +498,17 @@ svint64_t test_svldff1uh_gather_s64index_s64(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svldff1uh_gather_, s64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_s32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_s32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1uh_gather_s32index_u32u10__SVBool_tPKtu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1uh_gather_s32index_u32u10__SVBool_tPKtu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -524,17 +516,17 @@ svuint32_t test_svldff1uh_gather_s32index_u32(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svldff1uh_gather_, s32, index_u32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_s64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_s64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1uh_gather_s64index_u64u10__SVBool_tPKtu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1uh_gather_s64index_u64u10__SVBool_tPKtu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -542,17 +534,17 @@ svuint64_t test_svldff1uh_gather_s64index_u64(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svldff1uh_gather_, s64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1uh_gather_u32index_s32u10__SVBool_tPKtu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1uh_gather_u32index_s32u10__SVBool_tPKtu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -560,17 +552,17 @@ svint32_t test_svldff1uh_gather_u32index_s32(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svldff1uh_gather_, u32, index_s32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1uh_gather_u64index_s64u10__SVBool_tPKtu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1uh_gather_u64index_s64u10__SVBool_tPKtu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -578,17 +570,17 @@ svint64_t test_svldff1uh_gather_u64index_s64(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svldff1uh_gather_, u64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1uh_gather_u32index_u32u10__SVBool_tPKtu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1uh_gather_u32index_u32u10__SVBool_tPKtu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -596,17 +588,17 @@ svuint32_t test_svldff1uh_gather_u32index_u32(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svldff1uh_gather_, u32, index_u32, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1uh_gather_u64index_u64u10__SVBool_tPKtu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1uh_gather_u64index_u64u10__SVBool_tPKtu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -614,7 +606,7 @@ svuint64_t test_svldff1uh_gather_u64index_u64(svbool_t pg, const uint16_t *base,
    return SVE_ACLE_FUNC(svldff1uh_gather_, u64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u32base_index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u32base_index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -622,7 +614,7 @@ svuint64_t test_svldff1uh_gather_u64index_u64(svbool_t pg, const uint16_t *base,
  // CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z39test_svldff1uh_gather_u32base_index_s32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z39test_svldff1uh_gather_u32base_index_s32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -634,7 +626,7 @@ svint32_t test_svldff1uh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases,
    return SVE_ACLE_FUNC(svldff1uh_gather, _u32base, _index_s32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u64base_index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u64base_index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -642,7 +634,7 @@ svint32_t test_svldff1uh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases,
  // CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z39test_svldff1uh_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z39test_svldff1uh_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -654,7 +646,7 @@ svint64_t test_svldff1uh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svldff1uh_gather, _u64base, _index_s64, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u32base_index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u32base_index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -662,7 +654,7 @@ svint64_t test_svldff1uh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases,
  // CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z39test_svldff1uh_gather_u32base_index_u32u10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z39test_svldff1uh_gather_u32base_index_u32u10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -674,7 +666,7 @@ svuint32_t test_svldff1uh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases
    return SVE_ACLE_FUNC(svldff1uh_gather, _u32base, _index_u32, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svldff1uh_gather_u64base_index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uh_gather_u64base_index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
@@ -682,7 +674,7 @@ svuint32_t test_svldff1uh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases
  // CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z39test_svldff1uh_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z39test_svldff1uh_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 1
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c

index 858bc5a..e59f6af 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,17 +14,17 @@
  #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svldff1uw_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1uw_s64u10__SVBool_tPKj(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1uw_s64u10__SVBool_tPKj(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -33,17 +33,17 @@ svint64_t test_svldff1uw_s64(svbool_t pg, const uint32_t *base)
    return svldff1uw_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1uw_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldff1uw_u64u10__SVBool_tPKj(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldff1uw_u64u10__SVBool_tPKj(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -52,60 +52,56 @@ svuint64_t test_svldff1uw_u64(svbool_t pg, const uint32_t *base)
    return svldff1uw_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldff1uw_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1uw_vnum_s64u10__SVBool_tPKjl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1uw_vnum_s64u10__SVBool_tPKjl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svldff1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum)
  {
    return svldff1uw_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1uw_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldff1uw_vnum_u64u10__SVBool_tPKjl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldff1uw_vnum_u64u10__SVBool_tPKjl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svldff1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum)
  {
    return svldff1uw_vnum_u64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1uw_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1uw_gather_u64base_s64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -116,14 +112,14 @@ svint64_t test_svldff1uw_gather_u64base_s64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1uw_gather, _u64base, _s64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z33test_svldff1uw_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svldff1uw_gather_u64base_u64u10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -134,17 +130,17 @@ svuint64_t test_svldff1uw_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
    return SVE_ACLE_FUNC(svldff1uw_gather, _u64base, _u64, )(pg, bases);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1uw_gather_s64offset_s64u10__SVBool_tPKju11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1uw_gather_s64offset_s64u10__SVBool_tPKju11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -152,17 +148,17 @@ svint64_t test_svldff1uw_gather_s64offset_s64(svbool_t pg, const uint32_t *base,
    return SVE_ACLE_FUNC(svldff1uw_gather_, s64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1uw_gather_s64offset_u64u10__SVBool_tPKju11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1uw_gather_s64offset_u64u10__SVBool_tPKju11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -170,17 +166,17 @@ svuint64_t test_svldff1uw_gather_s64offset_u64(svbool_t pg, const uint32_t *base
    return SVE_ACLE_FUNC(svldff1uw_gather_, s64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1uw_gather_u64offset_s64u10__SVBool_tPKju12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1uw_gather_u64offset_s64u10__SVBool_tPKju12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -188,17 +184,17 @@ svint64_t test_svldff1uw_gather_u64offset_s64(svbool_t pg, const uint32_t *base,
    return SVE_ACLE_FUNC(svldff1uw_gather_, u64, offset_s64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z35test_svldff1uw_gather_u64offset_u64u10__SVBool_tPKju12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z35test_svldff1uw_gather_u64offset_u64u10__SVBool_tPKju12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -206,14 +202,14 @@ svuint64_t test_svldff1uw_gather_u64offset_u64(svbool_t pg, const uint32_t *base
    return SVE_ACLE_FUNC(svldff1uw_gather_, u64, offset_u64, )(pg, base, offsets);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1uw_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1uw_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -224,14 +220,14 @@ svint64_t test_svldff1uw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases
    return SVE_ACLE_FUNC(svldff1uw_gather, _u64base, _offset_s64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z40test_svldff1uw_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z40test_svldff1uw_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -242,17 +238,17 @@ svuint64_t test_svldff1uw_gather_u64base_offset_u64(svbool_t pg, svuint64_t base
    return SVE_ACLE_FUNC(svldff1uw_gather, _u64base, _offset_u64, )(pg, bases, offset);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_s64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_s64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1uw_gather_s64index_s64u10__SVBool_tPKju11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1uw_gather_s64index_s64u10__SVBool_tPKju11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -260,17 +256,17 @@ svint64_t test_svldff1uw_gather_s64index_s64(svbool_t pg, const uint32_t *base,
    return SVE_ACLE_FUNC(svldff1uw_gather_, s64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_s64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_s64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1uw_gather_s64index_u64u10__SVBool_tPKju11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1uw_gather_s64index_u64u10__SVBool_tPKju11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -278,17 +274,17 @@ svuint64_t test_svldff1uw_gather_s64index_u64(svbool_t pg, const uint32_t *base,
    return SVE_ACLE_FUNC(svldff1uw_gather_, s64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_u64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_u64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1uw_gather_u64index_s64u10__SVBool_tPKju12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1uw_gather_u64index_s64u10__SVBool_tPKju12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -296,17 +292,17 @@ svint64_t test_svldff1uw_gather_u64index_s64(svbool_t pg, const uint32_t *base,
    return SVE_ACLE_FUNC(svldff1uw_gather_, u64, index_s64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_u64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_u64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z34test_svldff1uw_gather_u64index_u64u10__SVBool_tPKju12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z34test_svldff1uw_gather_u64index_u64u10__SVBool_tPKju12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -314,7 +310,7 @@ svuint64_t test_svldff1uw_gather_u64index_u64(svbool_t pg, const uint32_t *base,
    return SVE_ACLE_FUNC(svldff1uw_gather_, u64, index_u64, )(pg, base, indices);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_u64base_index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_u64base_index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -322,7 +318,7 @@ svuint64_t test_svldff1uw_gather_u64index_u64(svbool_t pg, const uint32_t *base,
  // CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z39test_svldff1uw_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z39test_svldff1uw_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -334,7 +330,7 @@ svint64_t test_svldff1uw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases,
    return SVE_ACLE_FUNC(svldff1uw_gather, _u64base, _index_s64, )(pg, bases, index);
  }
  
-// CHECK-LABEL: @test_svldff1uw_gather_u64base_index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldff1uw_gather_u64base_index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -342,7 +338,7 @@ svint64_t test_svldff1uw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases,
  // CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z39test_svldff1uw_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z39test_svldff1uw_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1-bfloat.c

index c5f7930..7a396b9 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1-bfloat.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  // REQUIRES: aarch64-registered-target
  
  #include <arm_sve.h>
@@ -14,16 +14,16 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svldnf1_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z17test_svldnf1_bf16u10__SVBool_tPKu6__bf16(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z17test_svldnf1_bf16u10__SVBool_tPKu6__bf16(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
  //
  svbfloat16_t test_svldnf1_bf16(svbool_t pg, const bfloat16_t *base)
@@ -31,21 +31,19 @@ svbfloat16_t test_svldnf1_bf16(svbool_t pg, const bfloat16_t *base)
    return SVE_ACLE_FUNC(svldnf1,_bf16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1_vnum_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_vnum_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z22test_svldnf1_vnum_bf16u10__SVBool_tPKu6__bf16l(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z22test_svldnf1_vnum_bf16u10__SVBool_tPKu6__bf16l(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
  //
  svbfloat16_t test_svldnf1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c

index c6e03d0..194f327 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,14 +14,14 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svldnf1_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
-// CPP-CHECK-LABEL: @_Z15test_svldnf1_s8u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svldnf1_s8u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
  svint8_t test_svldnf1_s8(svbool_t pg, const int8_t *base)
@@ -29,16 +29,16 @@ svint8_t test_svldnf1_s8(svbool_t pg, const int8_t *base)
    return SVE_ACLE_FUNC(svldnf1,_s8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnf1_s16u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnf1_s16u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
  svint16_t test_svldnf1_s16(svbool_t pg, const int16_t *base)
@@ -46,16 +46,16 @@ svint16_t test_svldnf1_s16(svbool_t pg, const int16_t *base)
    return SVE_ACLE_FUNC(svldnf1,_s16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnf1_s32u10__SVBool_tPKi(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnf1_s32u10__SVBool_tPKi(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svint32_t test_svldnf1_s32(svbool_t pg, const int32_t *base)
@@ -63,16 +63,16 @@ svint32_t test_svldnf1_s32(svbool_t pg, const int32_t *base)
    return SVE_ACLE_FUNC(svldnf1,_s32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnf1_s64u10__SVBool_tPKl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnf1_s64u10__SVBool_tPKl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svint64_t test_svldnf1_s64(svbool_t pg, const int64_t *base)
@@ -80,14 +80,14 @@ svint64_t test_svldnf1_s64(svbool_t pg, const int64_t *base)
    return SVE_ACLE_FUNC(svldnf1,_s64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
-// CPP-CHECK-LABEL: @_Z15test_svldnf1_u8u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svldnf1_u8u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
  svuint8_t test_svldnf1_u8(svbool_t pg, const uint8_t *base)
@@ -95,16 +95,16 @@ svuint8_t test_svldnf1_u8(svbool_t pg, const uint8_t *base)
    return SVE_ACLE_FUNC(svldnf1,_u8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnf1_u16u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnf1_u16u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
  svuint16_t test_svldnf1_u16(svbool_t pg, const uint16_t *base)
@@ -112,16 +112,16 @@ svuint16_t test_svldnf1_u16(svbool_t pg, const uint16_t *base)
    return SVE_ACLE_FUNC(svldnf1,_u16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnf1_u32u10__SVBool_tPKj(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnf1_u32u10__SVBool_tPKj(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svuint32_t test_svldnf1_u32(svbool_t pg, const uint32_t *base)
@@ -129,16 +129,16 @@ svuint32_t test_svldnf1_u32(svbool_t pg, const uint32_t *base)
    return SVE_ACLE_FUNC(svldnf1,_u32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnf1_u64u10__SVBool_tPKm(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnf1_u64u10__SVBool_tPKm(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svuint64_t test_svldnf1_u64(svbool_t pg, const uint64_t *base)
@@ -146,16 +146,16 @@ svuint64_t test_svldnf1_u64(svbool_t pg, const uint64_t *base)
    return SVE_ACLE_FUNC(svldnf1,_u64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnf1_f16u10__SVBool_tPKDh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnf1_f16u10__SVBool_tPKDh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
  //
  svfloat16_t test_svldnf1_f16(svbool_t pg, const float16_t *base)
@@ -163,16 +163,16 @@ svfloat16_t test_svldnf1_f16(svbool_t pg, const float16_t *base)
    return SVE_ACLE_FUNC(svldnf1,_f16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnf1_f32u10__SVBool_tPKf(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnf1_f32u10__SVBool_tPKf(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
  svfloat32_t test_svldnf1_f32(svbool_t pg, const float32_t *base)
@@ -180,16 +180,16 @@ svfloat32_t test_svldnf1_f32(svbool_t pg, const float32_t *base)
    return SVE_ACLE_FUNC(svldnf1,_f32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnf1_f64u10__SVBool_tPKd(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnf1_f64u10__SVBool_tPKd(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
  svfloat64_t test_svldnf1_f64(svbool_t pg, const float64_t *base)
@@ -197,227 +197,205 @@ svfloat64_t test_svldnf1_f64(svbool_t pg, const float64_t *base)
    return SVE_ACLE_FUNC(svldnf1,_f64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1_vnum_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_vnum_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z20test_svldnf1_vnum_s8u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svldnf1_vnum_s8u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
  svint8_t test_svldnf1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnf1_vnum,_s8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnf1_vnum_s16u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnf1_vnum_s16u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
  svint16_t test_svldnf1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnf1_vnum,_s16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnf1_vnum_s32u10__SVBool_tPKil(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnf1_vnum_s32u10__SVBool_tPKil(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svint32_t test_svldnf1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnf1_vnum,_s32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnf1_vnum_s64u10__SVBool_tPKll(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnf1_vnum_s64u10__SVBool_tPKll(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svint64_t test_svldnf1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnf1_vnum,_s64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1_vnum_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_vnum_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z20test_svldnf1_vnum_u8u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svldnf1_vnum_u8u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
  svuint8_t test_svldnf1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnf1_vnum,_u8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnf1_vnum_u16u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnf1_vnum_u16u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
  svuint16_t test_svldnf1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnf1_vnum,_u16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnf1_vnum_u32u10__SVBool_tPKjl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnf1_vnum_u32u10__SVBool_tPKjl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svuint32_t test_svldnf1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnf1_vnum,_u32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnf1_vnum_u64u10__SVBool_tPKml(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnf1_vnum_u64u10__SVBool_tPKml(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svuint64_t test_svldnf1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnf1_vnum,_u64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1_vnum_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_vnum_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnf1_vnum_f16u10__SVBool_tPKDhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnf1_vnum_f16u10__SVBool_tPKDhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
  //
  svfloat16_t test_svldnf1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnf1_vnum,_f16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1_vnum_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_vnum_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnf1_vnum_f32u10__SVBool_tPKfl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnf1_vnum_f32u10__SVBool_tPKfl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
  //
  svfloat32_t test_svldnf1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnf1_vnum,_f32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1_vnum_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1_vnum_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnf1_vnum_f64u10__SVBool_tPKdl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnf1_vnum_f64u10__SVBool_tPKdl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
  //
  svfloat64_t test_svldnf1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sb.c

index f9ca632..3b34f60 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sb.c
@@ -1,21 +1,21 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
-// CHECK-LABEL: @test_svldnf1sb_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sb_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1sb_s16u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1sb_s16u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
@@ -24,17 +24,17 @@ svint16_t test_svldnf1sb_s16(svbool_t pg, const int8_t *base)
    return svldnf1sb_s16(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1sb_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sb_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1sb_s32u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1sb_s32u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -43,17 +43,17 @@ svint32_t test_svldnf1sb_s32(svbool_t pg, const int8_t *base)
    return svldnf1sb_s32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1sb_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sb_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1sb_s64u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1sb_s64u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -62,17 +62,17 @@ svint64_t test_svldnf1sb_s64(svbool_t pg, const int8_t *base)
    return svldnf1sb_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1sb_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sb_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1sb_u16u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1sb_u16u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
@@ -81,17 +81,17 @@ svuint16_t test_svldnf1sb_u16(svbool_t pg, const int8_t *base)
    return svldnf1sb_u16(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1sb_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sb_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1sb_u32u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1sb_u32u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -100,17 +100,17 @@ svuint32_t test_svldnf1sb_u32(svbool_t pg, const int8_t *base)
    return svldnf1sb_u32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1sb_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sb_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1sb_u64u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1sb_u64u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -119,138 +119,126 @@ svuint64_t test_svldnf1sb_u64(svbool_t pg, const int8_t *base)
    return svldnf1sb_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1sb_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sb_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1sb_vnum_s16u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1sb_vnum_s16u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
  svint16_t test_svldnf1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svldnf1sb_vnum_s16(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1sb_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sb_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1sb_vnum_s32u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1sb_vnum_s32u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svint32_t test_svldnf1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svldnf1sb_vnum_s32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1sb_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sb_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1sb_vnum_s64u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1sb_vnum_s64u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svldnf1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svldnf1sb_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1sb_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sb_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1sb_vnum_u16u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1sb_vnum_u16u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
  svuint16_t test_svldnf1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svldnf1sb_vnum_u16(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1sb_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sb_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1sb_vnum_u32u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1sb_vnum_u32u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svuint32_t test_svldnf1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return svldnf1sb_vnum_u32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1sb_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sb_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1sb_vnum_u64u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1sb_vnum_u64u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svldnf1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sh.c

index 1d9914c..9e7ae47 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sh.c
@@ -1,21 +1,21 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
-// CHECK-LABEL: @test_svldnf1sh_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sh_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1sh_s32u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1sh_s32u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -24,17 +24,17 @@ svint32_t test_svldnf1sh_s32(svbool_t pg, const int16_t *base)
    return svldnf1sh_s32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1sh_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sh_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1sh_s64u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1sh_s64u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -43,17 +43,17 @@ svint64_t test_svldnf1sh_s64(svbool_t pg, const int16_t *base)
    return svldnf1sh_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1sh_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sh_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1sh_u32u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1sh_u32u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -62,17 +62,17 @@ svuint32_t test_svldnf1sh_u32(svbool_t pg, const int16_t *base)
    return svldnf1sh_u32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1sh_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sh_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1sh_u64u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1sh_u64u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -81,92 +81,84 @@ svuint64_t test_svldnf1sh_u64(svbool_t pg, const int16_t *base)
    return svldnf1sh_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1sh_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sh_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1sh_vnum_s32u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1sh_vnum_s32u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svint32_t test_svldnf1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return svldnf1sh_vnum_s32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1sh_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sh_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1sh_vnum_s64u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1sh_vnum_s64u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svldnf1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return svldnf1sh_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1sh_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sh_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1sh_vnum_u32u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1sh_vnum_u32u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svuint32_t test_svldnf1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return svldnf1sh_vnum_u32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1sh_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sh_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1sh_vnum_u64u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1sh_vnum_u64u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svldnf1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sw.c

index 7458033..3de3a7e 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sw.c
@@ -1,21 +1,21 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
-// CHECK-LABEL: @test_svldnf1sw_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sw_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1sw_s64u10__SVBool_tPKi(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1sw_s64u10__SVBool_tPKi(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -24,17 +24,17 @@ svint64_t test_svldnf1sw_s64(svbool_t pg, const int32_t *base)
    return svldnf1sw_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1sw_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sw_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1sw_u64u10__SVBool_tPKi(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1sw_u64u10__SVBool_tPKi(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -43,46 +43,42 @@ svuint64_t test_svldnf1sw_u64(svbool_t pg, const int32_t *base)
    return svldnf1sw_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1sw_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sw_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1sw_vnum_s64u10__SVBool_tPKil(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1sw_vnum_s64u10__SVBool_tPKil(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svldnf1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum)
  {
    return svldnf1sw_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1sw_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1sw_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1sw_vnum_u64u10__SVBool_tPKil(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1sw_vnum_u64u10__SVBool_tPKil(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = sext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svldnf1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1ub.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1ub.c

index 3253fad..9c9aa42 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1ub.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1ub.c
@@ -1,21 +1,21 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
-// CHECK-LABEL: @test_svldnf1ub_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1ub_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1ub_s16u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1ub_s16u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
@@ -24,17 +24,17 @@ svint16_t test_svldnf1ub_s16(svbool_t pg, const uint8_t *base)
    return svldnf1ub_s16(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1ub_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1ub_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1ub_s32u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1ub_s32u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -43,17 +43,17 @@ svint32_t test_svldnf1ub_s32(svbool_t pg, const uint8_t *base)
    return svldnf1ub_s32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1ub_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1ub_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1ub_s64u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1ub_s64u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -62,17 +62,17 @@ svint64_t test_svldnf1ub_s64(svbool_t pg, const uint8_t *base)
    return svldnf1ub_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1ub_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1ub_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1ub_u16u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1ub_u16u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
@@ -81,17 +81,17 @@ svuint16_t test_svldnf1ub_u16(svbool_t pg, const uint8_t *base)
    return svldnf1ub_u16(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1ub_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1ub_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1ub_u32u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1ub_u32u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -100,17 +100,17 @@ svuint32_t test_svldnf1ub_u32(svbool_t pg, const uint8_t *base)
    return svldnf1ub_u32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1ub_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1ub_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1ub_u64u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1ub_u64u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -119,138 +119,126 @@ svuint64_t test_svldnf1ub_u64(svbool_t pg, const uint8_t *base)
    return svldnf1ub_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1ub_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1ub_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1ub_vnum_s16u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1ub_vnum_s16u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
  svint16_t test_svldnf1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svldnf1ub_vnum_s16(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1ub_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1ub_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1ub_vnum_s32u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1ub_vnum_s32u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svint32_t test_svldnf1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svldnf1ub_vnum_s32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1ub_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1ub_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1ub_vnum_s64u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1ub_vnum_s64u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svldnf1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svldnf1ub_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1ub_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1ub_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1ub_vnum_u16u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1ub_vnum_u16u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
  //
  svuint16_t test_svldnf1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svldnf1ub_vnum_u16(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1ub_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1ub_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1ub_vnum_u32u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1ub_vnum_u32u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svuint32_t test_svldnf1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return svldnf1ub_vnum_u32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1ub_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1ub_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1ub_vnum_u64u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1ub_vnum_u64u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svldnf1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uh.c

index c00c6d0..a4cf180 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uh.c
@@ -1,21 +1,21 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
-// CHECK-LABEL: @test_svldnf1uh_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1uh_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1uh_s32u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1uh_s32u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -24,17 +24,17 @@ svint32_t test_svldnf1uh_s32(svbool_t pg, const uint16_t *base)
    return svldnf1uh_s32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1uh_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1uh_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1uh_s64u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1uh_s64u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -43,17 +43,17 @@ svint64_t test_svldnf1uh_s64(svbool_t pg, const uint16_t *base)
    return svldnf1uh_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1uh_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1uh_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1uh_u32u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1uh_u32u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
@@ -62,17 +62,17 @@ svuint32_t test_svldnf1uh_u32(svbool_t pg, const uint16_t *base)
    return svldnf1uh_u32(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1uh_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1uh_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1uh_u64u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1uh_u64u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -81,92 +81,84 @@ svuint64_t test_svldnf1uh_u64(svbool_t pg, const uint16_t *base)
    return svldnf1uh_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1uh_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1uh_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1uh_vnum_s32u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1uh_vnum_s32u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svint32_t test_svldnf1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return svldnf1uh_vnum_s32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1uh_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1uh_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1uh_vnum_s64u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1uh_vnum_s64u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svldnf1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return svldnf1uh_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1uh_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1uh_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1uh_vnum_u32u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1uh_vnum_u32u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
  //
  svuint32_t test_svldnf1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return svldnf1uh_vnum_u32(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1uh_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1uh_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1uh_vnum_u64u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1uh_vnum_u64u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svldnf1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uw.c

index f95714b..e2e8842 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uw.c
@@ -1,21 +1,21 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
-// CHECK-LABEL: @test_svldnf1uw_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1uw_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1uw_s64u10__SVBool_tPKj(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1uw_s64u10__SVBool_tPKj(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -24,17 +24,17 @@ svint64_t test_svldnf1uw_s64(svbool_t pg, const uint32_t *base)
    return svldnf1uw_s64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1uw_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1uw_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z18test_svldnf1uw_u64u10__SVBool_tPKj(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svldnf1uw_u64u10__SVBool_tPKj(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
@@ -43,46 +43,42 @@ svuint64_t test_svldnf1uw_u64(svbool_t pg, const uint32_t *base)
    return svldnf1uw_u64(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnf1uw_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1uw_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1uw_vnum_s64u10__SVBool_tPKjl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1uw_vnum_s64u10__SVBool_tPKjl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svint64_t test_svldnf1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum)
  {
    return svldnf1uw_vnum_s64(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnf1uw_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnf1uw_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
-// CPP-CHECK-LABEL: @_Z23test_svldnf1uw_vnum_u64u10__SVBool_tPKjl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z23test_svldnf1uw_vnum_u64u10__SVBool_tPKjl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = zext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
  //
  svuint64_t test_svldnf1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1-bfloat.c

index f156954..9fb1ac9 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1-bfloat.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -15,16 +15,16 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svldnt1_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z17test_svldnt1_bf16u10__SVBool_tPKu6__bf16(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z17test_svldnt1_bf16u10__SVBool_tPKu6__bf16(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
  //
  svbfloat16_t test_svldnt1_bf16(svbool_t pg, const bfloat16_t *base)
@@ -32,21 +32,19 @@ svbfloat16_t test_svldnt1_bf16(svbool_t pg, const bfloat16_t *base)
    return SVE_ACLE_FUNC(svldnt1,_bf16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnt1_vnum_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_vnum_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z22test_svldnt1_vnum_bf16u10__SVBool_tPKu6__bf16l(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z22test_svldnt1_vnum_bf16u10__SVBool_tPKu6__bf16l(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
  //
  svbfloat16_t test_svldnt1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c

index bcc97d2..d0213d5 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
  
  #include <arm_sve.h>
  
@@ -15,14 +15,14 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svldnt1_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
-// CPP-CHECK-LABEL: @_Z15test_svldnt1_s8u10__SVBool_tPKa(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svldnt1_s8u10__SVBool_tPKa(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
  svint8_t test_svldnt1_s8(svbool_t pg, const int8_t *base)
@@ -30,16 +30,16 @@ svint8_t test_svldnt1_s8(svbool_t pg, const int8_t *base)
    return SVE_ACLE_FUNC(svldnt1,_s8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnt1_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnt1_s16u10__SVBool_tPKs(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnt1_s16u10__SVBool_tPKs(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
  svint16_t test_svldnt1_s16(svbool_t pg, const int16_t *base)
@@ -47,16 +47,16 @@ svint16_t test_svldnt1_s16(svbool_t pg, const int16_t *base)
    return SVE_ACLE_FUNC(svldnt1,_s16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnt1_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnt1_s32u10__SVBool_tPKi(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnt1_s32u10__SVBool_tPKi(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svint32_t test_svldnt1_s32(svbool_t pg, const int32_t *base)
@@ -64,16 +64,16 @@ svint32_t test_svldnt1_s32(svbool_t pg, const int32_t *base)
    return SVE_ACLE_FUNC(svldnt1,_s32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnt1_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnt1_s64u10__SVBool_tPKl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnt1_s64u10__SVBool_tPKl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svint64_t test_svldnt1_s64(svbool_t pg, const int64_t *base)
@@ -81,14 +81,14 @@ svint64_t test_svldnt1_s64(svbool_t pg, const int64_t *base)
    return SVE_ACLE_FUNC(svldnt1,_s64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnt1_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
-// CPP-CHECK-LABEL: @_Z15test_svldnt1_u8u10__SVBool_tPKh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svldnt1_u8u10__SVBool_tPKh(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
  //
  svuint8_t test_svldnt1_u8(svbool_t pg, const uint8_t *base)
@@ -96,16 +96,16 @@ svuint8_t test_svldnt1_u8(svbool_t pg, const uint8_t *base)
    return SVE_ACLE_FUNC(svldnt1,_u8,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnt1_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnt1_u16u10__SVBool_tPKt(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnt1_u16u10__SVBool_tPKt(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
  //
  svuint16_t test_svldnt1_u16(svbool_t pg, const uint16_t *base)
@@ -113,16 +113,16 @@ svuint16_t test_svldnt1_u16(svbool_t pg, const uint16_t *base)
    return SVE_ACLE_FUNC(svldnt1,_u16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnt1_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnt1_u32u10__SVBool_tPKj(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnt1_u32u10__SVBool_tPKj(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
  //
  svuint32_t test_svldnt1_u32(svbool_t pg, const uint32_t *base)
@@ -130,16 +130,16 @@ svuint32_t test_svldnt1_u32(svbool_t pg, const uint32_t *base)
    return SVE_ACLE_FUNC(svldnt1,_u32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnt1_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnt1_u64u10__SVBool_tPKm(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnt1_u64u10__SVBool_tPKm(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
  //
  svuint64_t test_svldnt1_u64(svbool_t pg, const uint64_t *base)
@@ -147,16 +147,16 @@ svuint64_t test_svldnt1_u64(svbool_t pg, const uint64_t *base)
    return SVE_ACLE_FUNC(svldnt1,_u64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnt1_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnt1_f16u10__SVBool_tPKDh(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnt1_f16u10__SVBool_tPKDh(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
  //
  svfloat16_t test_svldnt1_f16(svbool_t pg, const float16_t *base)
@@ -164,16 +164,16 @@ svfloat16_t test_svldnt1_f16(svbool_t pg, const float16_t *base)
    return SVE_ACLE_FUNC(svldnt1,_f16,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnt1_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnt1_f32u10__SVBool_tPKf(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnt1_f32u10__SVBool_tPKf(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
  //
  svfloat32_t test_svldnt1_f32(svbool_t pg, const float32_t *base)
@@ -181,16 +181,16 @@ svfloat32_t test_svldnt1_f32(svbool_t pg, const float32_t *base)
    return SVE_ACLE_FUNC(svldnt1,_f32,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnt1_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z16test_svldnt1_f64u10__SVBool_tPKd(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svldnt1_f64u10__SVBool_tPKd(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
  //
  svfloat64_t test_svldnt1_f64(svbool_t pg, const float64_t *base)
@@ -198,227 +198,205 @@ svfloat64_t test_svldnt1_f64(svbool_t pg, const float64_t *base)
    return SVE_ACLE_FUNC(svldnt1,_f64,,)(pg, base);
  }
  
-// CHECK-LABEL: @test_svldnt1_vnum_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_vnum_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z20test_svldnt1_vnum_s8u10__SVBool_tPKal(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svldnt1_vnum_s8u10__SVBool_tPKal(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
  svint8_t test_svldnt1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnt1_vnum,_s8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnt1_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnt1_vnum_s16u10__SVBool_tPKsl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnt1_vnum_s16u10__SVBool_tPKsl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
  svint16_t test_svldnt1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnt1_vnum,_s16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnt1_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnt1_vnum_s32u10__SVBool_tPKil(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnt1_vnum_s32u10__SVBool_tPKil(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svint32_t test_svldnt1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnt1_vnum,_s32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnt1_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnt1_vnum_s64u10__SVBool_tPKll(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnt1_vnum_s64u10__SVBool_tPKll(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svint64_t test_svldnt1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnt1_vnum,_s64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnt1_vnum_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_vnum_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
-// CPP-CHECK-LABEL: @_Z20test_svldnt1_vnum_u8u10__SVBool_tPKhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svldnt1_vnum_u8u10__SVBool_tPKhl(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
-// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
  //
  svuint8_t test_svldnt1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnt1_vnum,_u8,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnt1_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnt1_vnum_u16u10__SVBool_tPKtl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnt1_vnum_u16u10__SVBool_tPKtl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
  //
  svuint16_t test_svldnt1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnt1_vnum,_u16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnt1_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnt1_vnum_u32u10__SVBool_tPKjl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnt1_vnum_u32u10__SVBool_tPKjl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
  //
  svuint32_t test_svldnt1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnt1_vnum,_u32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnt1_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnt1_vnum_u64u10__SVBool_tPKml(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnt1_vnum_u64u10__SVBool_tPKml(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
  //
  svuint64_t test_svldnt1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnt1_vnum,_u64,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnt1_vnum_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_vnum_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnt1_vnum_f16u10__SVBool_tPKDhl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnt1_vnum_f16u10__SVBool_tPKDhl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
  //
  svfloat16_t test_svldnt1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnt1_vnum,_f16,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnt1_vnum_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_vnum_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnt1_vnum_f32u10__SVBool_tPKfl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnt1_vnum_f32u10__SVBool_tPKfl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
  //
  svfloat32_t test_svldnt1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
  {
    return SVE_ACLE_FUNC(svldnt1_vnum,_f32,,)(pg, base, vnum);
  }
  
-// CHECK-LABEL: @test_svldnt1_vnum_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svldnt1_vnum_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
-// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
  //
-// CPP-CHECK-LABEL: @_Z21test_svldnt1_vnum_f64u10__SVBool_tPKdl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svldnt1_vnum_f64u10__SVBool_tPKdl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
-// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP3]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
  //
  svfloat64_t test_svldnt1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
  {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c

index ec6f8b0..41fe9f2 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,14 +14,14 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svprfb(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 0)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z11test_svprfbu10__SVBool_tPKv(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z11test_svprfbu10__SVBool_tPKv(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 0)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 0)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb(svbool_t pg, const void *base)
@@ -29,14 +29,14 @@ void test_svprfb(svbool_t pg, const void *base)
    return svprfb(pg, base, SV_PLDL1KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_1(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_1(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 1)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 1)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svprfb_1u10__SVBool_tPKv(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svprfb_1u10__SVBool_tPKv(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 1)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 1)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_1(svbool_t pg, const void *base)
@@ -44,14 +44,14 @@ void test_svprfb_1(svbool_t pg, const void *base)
    return svprfb(pg, base, SV_PLDL1STRM);
  }
  
-// CHECK-LABEL: @test_svprfb_2(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_2(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 2)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svprfb_2u10__SVBool_tPKv(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svprfb_2u10__SVBool_tPKv(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 2)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_2(svbool_t pg, const void *base)
@@ -59,14 +59,14 @@ void test_svprfb_2(svbool_t pg, const void *base)
    return svprfb(pg, base, SV_PLDL2KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_3(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_3(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 3)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 3)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svprfb_3u10__SVBool_tPKv(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svprfb_3u10__SVBool_tPKv(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 3)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 3)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_3(svbool_t pg, const void *base)
@@ -74,14 +74,14 @@ void test_svprfb_3(svbool_t pg, const void *base)
    return svprfb(pg, base, SV_PLDL2STRM);
  }
  
-// CHECK-LABEL: @test_svprfb_4(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_4(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 4)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svprfb_4u10__SVBool_tPKv(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svprfb_4u10__SVBool_tPKv(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 4)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_4(svbool_t pg, const void *base)
@@ -89,14 +89,14 @@ void test_svprfb_4(svbool_t pg, const void *base)
    return svprfb(pg, base, SV_PLDL3KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_5(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_5(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 5)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 5)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svprfb_5u10__SVBool_tPKv(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svprfb_5u10__SVBool_tPKv(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 5)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 5)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_5(svbool_t pg, const void *base)
@@ -104,14 +104,14 @@ void test_svprfb_5(svbool_t pg, const void *base)
    return svprfb(pg, base, SV_PLDL3STRM);
  }
  
-// CHECK-LABEL: @test_svprfb_6(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_6(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 8)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svprfb_6u10__SVBool_tPKv(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svprfb_6u10__SVBool_tPKv(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 8)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_6(svbool_t pg, const void *base)
@@ -119,14 +119,14 @@ void test_svprfb_6(svbool_t pg, const void *base)
    return svprfb(pg, base, SV_PSTL1KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_7(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_7(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 9)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 9)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svprfb_7u10__SVBool_tPKv(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svprfb_7u10__SVBool_tPKv(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 9)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 9)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_7(svbool_t pg, const void *base)
@@ -134,14 +134,14 @@ void test_svprfb_7(svbool_t pg, const void *base)
    return svprfb(pg, base, SV_PSTL1STRM);
  }
  
-// CHECK-LABEL: @test_svprfb_8(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 10)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 10)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svprfb_8u10__SVBool_tPKv(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svprfb_8u10__SVBool_tPKv(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 10)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 10)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_8(svbool_t pg, const void *base)
@@ -149,14 +149,14 @@ void test_svprfb_8(svbool_t pg, const void *base)
    return svprfb(pg, base, SV_PSTL2KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_9(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_9(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 11)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 11)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svprfb_9u10__SVBool_tPKv(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svprfb_9u10__SVBool_tPKv(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 11)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 11)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_9(svbool_t pg, const void *base)
@@ -164,14 +164,14 @@ void test_svprfb_9(svbool_t pg, const void *base)
    return svprfb(pg, base, SV_PSTL2STRM);
  }
  
-// CHECK-LABEL: @test_svprfb_10(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_10(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 12)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svprfb_10u10__SVBool_tPKv(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svprfb_10u10__SVBool_tPKv(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 12)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_10(svbool_t pg, const void *base)
@@ -179,14 +179,14 @@ void test_svprfb_10(svbool_t pg, const void *base)
    return svprfb(pg, base, SV_PSTL3KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_11(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_11(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 13)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 13)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svprfb_11u10__SVBool_tPKv(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svprfb_11u10__SVBool_tPKv(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]], i32 13)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 13)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_11(svbool_t pg, const void *base)
@@ -194,18 +194,16 @@ void test_svprfb_11(svbool_t pg, const void *base)
    return svprfb(pg, base, SV_PSTL3STRM);
  }
  
-// CHECK-LABEL: @test_svprfb_vnum(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_vnum(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]], i32 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]], i32 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z16test_svprfb_vnumu10__SVBool_tPKvl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svprfb_vnumu10__SVBool_tPKvl(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]], i32 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]], i32 0)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_vnum(svbool_t pg, const void *base, int64_t vnum)
@@ -213,13 +211,13 @@ void test_svprfb_vnum(svbool_t pg, const void *base, int64_t vnum)
    return svprfb_vnum(pg, base, vnum, SV_PLDL1KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_gather_u32base(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_gather_u32base(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0, i32 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z26test_svprfb_gather_u32baseu10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z26test_svprfb_gather_u32baseu10__SVBool_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0, i32 0)
@@ -230,13 +228,13 @@ void test_svprfb_gather_u32base(svbool_t pg, svuint32_t bases)
    return SVE_ACLE_FUNC(svprfb_gather,_u32base,,)(pg, bases, SV_PLDL1KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_gather_u64base(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_gather_u64base(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0, i32 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z26test_svprfb_gather_u64baseu10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z26test_svprfb_gather_u64baseu10__SVBool_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0, i32 0)
@@ -247,16 +245,16 @@ void test_svprfb_gather_u64base(svbool_t pg, svuint64_t bases)
    return SVE_ACLE_FUNC(svprfb_gather,_u64base,,)(pg, bases, SV_PLDL1KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_gather_s32offset(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_gather_s32offset(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]], i32 0)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]], i32 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z28test_svprfb_gather_s32offsetu10__SVBool_tPKvu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z28test_svprfb_gather_s32offsetu10__SVBool_tPKvu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]], i32 0)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]], i32 0)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_gather_s32offset(svbool_t pg, const void *base, svint32_t offsets)
@@ -264,16 +262,16 @@ void test_svprfb_gather_s32offset(svbool_t pg, const void *base, svint32_t offse
    return SVE_ACLE_FUNC(svprfb_gather_,s32,offset,)(pg, base, offsets, SV_PLDL1KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_gather_s64offset(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_gather_s64offset(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]], i32 0)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]], i32 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z28test_svprfb_gather_s64offsetu10__SVBool_tPKvu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z28test_svprfb_gather_s64offsetu10__SVBool_tPKvu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]], i32 0)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]], i32 0)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_gather_s64offset(svbool_t pg, const void *base, svint64_t offsets)
@@ -281,16 +279,16 @@ void test_svprfb_gather_s64offset(svbool_t pg, const void *base, svint64_t offse
    return SVE_ACLE_FUNC(svprfb_gather_,s64,offset,)(pg, base, offsets, SV_PLDL1KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_gather_u32offset(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_gather_u32offset(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]], i32 0)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]], i32 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z28test_svprfb_gather_u32offsetu10__SVBool_tPKvu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z28test_svprfb_gather_u32offsetu10__SVBool_tPKvu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]], i32 0)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]], i32 0)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_gather_u32offset(svbool_t pg, const void *base, svuint32_t offsets)
@@ -298,16 +296,16 @@ void test_svprfb_gather_u32offset(svbool_t pg, const void *base, svuint32_t offs
    return SVE_ACLE_FUNC(svprfb_gather_,u32,offset,)(pg, base, offsets, SV_PLDL1KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_gather_u64offset(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_gather_u64offset(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]], i32 0)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]], i32 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z28test_svprfb_gather_u64offsetu10__SVBool_tPKvu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z28test_svprfb_gather_u64offsetu10__SVBool_tPKvu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]], i32 0)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]], i32 0)
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svprfb_gather_u64offset(svbool_t pg, const void *base, svuint64_t offsets)
@@ -315,13 +313,13 @@ void test_svprfb_gather_u64offset(svbool_t pg, const void *base, svuint64_t offs
    return SVE_ACLE_FUNC(svprfb_gather_,u64,offset,)(pg, base, offsets, SV_PLDL1KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_gather_u32base_offset(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_gather_u32base_offset(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]], i32 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z33test_svprfb_gather_u32base_offsetu10__SVBool_tu12__SVUint32_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svprfb_gather_u32base_offsetu10__SVBool_tu12__SVUint32_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]], i32 0)
@@ -333,13 +331,13 @@ void test_svprfb_gather_u32base_offset(svbool_t pg, svuint32_t bases, int64_t of
    return SVE_ACLE_FUNC(svprfb_gather,_u32base,_offset,)(pg, bases, offset, SV_PLDL1KEEP);
  }
  
-// CHECK-LABEL: @test_svprfb_gather_u64base_offset(
+// CHECK-LABEL: define {{[^@]+}}@test_svprfb_gather_u64base_offset(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]], i32 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z33test_svprfb_gather_u64base_offsetu10__SVBool_tu12__SVUint64_tl(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z33test_svprfb_gather_u64base_offsetu10__SVBool_tu12__SVUint64_tl(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]], i32 0)
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c

index 9227724..f9ddbf9 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -15,18 +15,16 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svst1_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x bfloat>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8bf16.p0(<vscale x 8 x bfloat> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z15test_svst1_bf16u10__SVBool_tPu6__bf16u14__SVBFloat16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svst1_bf16u10__SVBool_tPu6__bf16u14__SVBFloat16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x bfloat>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8bf16.p0(<vscale x 8 x bfloat> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data)
@@ -34,22 +32,18 @@ void test_svst1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data)
    return SVE_ACLE_FUNC(svst1,_bf16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1_vnum_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_vnum_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast bfloat* [[TMP2]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x bfloat>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8bf16.p0(<vscale x 8 x bfloat> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z20test_svst1_vnum_bf16u10__SVBool_tPu6__bf16lu14__SVBFloat16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svst1_vnum_bf16u10__SVBool_tPu6__bf16lu14__SVBFloat16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast bfloat* [[TMP2]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x bfloat>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8bf16.p0(<vscale x 8 x bfloat> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16_t data)
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c

index e7006f7..83e68a7 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,16 +14,14 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svst1_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svst1_s8u10__SVBool_tPau10__SVInt8_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svst1_s8u10__SVBool_tPau10__SVInt8_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_s8(svbool_t pg, int8_t *base, svint8_t data)
@@ -31,18 +29,16 @@ void test_svst1_s8(svbool_t pg, int8_t *base, svint8_t data)
    return SVE_ACLE_FUNC(svst1,_s8,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst1_s16u10__SVBool_tPsu11__SVInt16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst1_s16u10__SVBool_tPsu11__SVInt16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_s16(svbool_t pg, int16_t *base, svint16_t data)
@@ -50,18 +46,16 @@ void test_svst1_s16(svbool_t pg, int16_t *base, svint16_t data)
    return SVE_ACLE_FUNC(svst1,_s16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst1_s32u10__SVBool_tPiu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst1_s32u10__SVBool_tPiu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_s32(svbool_t pg, int32_t *base, svint32_t data)
@@ -69,18 +63,16 @@ void test_svst1_s32(svbool_t pg, int32_t *base, svint32_t data)
    return SVE_ACLE_FUNC(svst1,_s32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst1_s64u10__SVBool_tPlu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst1_s64u10__SVBool_tPlu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_s64(svbool_t pg, int64_t *base, svint64_t data)
@@ -88,16 +80,14 @@ void test_svst1_s64(svbool_t pg, int64_t *base, svint64_t data)
    return SVE_ACLE_FUNC(svst1,_s64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svst1_u8u10__SVBool_tPhu11__SVUint8_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svst1_u8u10__SVBool_tPhu11__SVUint8_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_u8(svbool_t pg, uint8_t *base, svuint8_t data)
@@ -105,18 +95,16 @@ void test_svst1_u8(svbool_t pg, uint8_t *base, svuint8_t data)
    return SVE_ACLE_FUNC(svst1,_u8,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst1_u16u10__SVBool_tPtu12__SVUint16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst1_u16u10__SVBool_tPtu12__SVUint16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_u16(svbool_t pg, uint16_t *base, svuint16_t data)
@@ -124,18 +112,16 @@ void test_svst1_u16(svbool_t pg, uint16_t *base, svuint16_t data)
    return SVE_ACLE_FUNC(svst1,_u16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst1_u32u10__SVBool_tPju12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst1_u32u10__SVBool_tPju12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_u32(svbool_t pg, uint32_t *base, svuint32_t data)
@@ -143,18 +129,16 @@ void test_svst1_u32(svbool_t pg, uint32_t *base, svuint32_t data)
    return SVE_ACLE_FUNC(svst1,_u32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst1_u64u10__SVBool_tPmu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst1_u64u10__SVBool_tPmu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_u64(svbool_t pg, uint64_t *base, svuint64_t data)
@@ -162,18 +146,16 @@ void test_svst1_u64(svbool_t pg, uint64_t *base, svuint64_t data)
    return SVE_ACLE_FUNC(svst1,_u64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8f16.p0nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x half>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8f16.p0(<vscale x 8 x half> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst1_f16u10__SVBool_tPDhu13__SVFloat16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst1_f16u10__SVBool_tPDhu13__SVFloat16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8f16.p0nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x half>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8f16.p0(<vscale x 8 x half> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_f16(svbool_t pg, float16_t *base, svfloat16_t data)
@@ -181,18 +163,16 @@ void test_svst1_f16(svbool_t pg, float16_t *base, svfloat16_t data)
    return SVE_ACLE_FUNC(svst1,_f16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x float>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst1_f32u10__SVBool_tPfu13__SVFloat32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst1_f32u10__SVBool_tPfu13__SVFloat32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x float>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_f32(svbool_t pg, float32_t *base, svfloat32_t data)
@@ -200,18 +180,16 @@ void test_svst1_f32(svbool_t pg, float32_t *base, svfloat32_t data)
    return SVE_ACLE_FUNC(svst1,_f32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x double>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst1_f64u10__SVBool_tPdu13__SVFloat64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst1_f64u10__SVBool_tPdu13__SVFloat64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x double>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_f64(svbool_t pg, float64_t *base, svfloat64_t data)
@@ -219,20 +197,16 @@ void test_svst1_f64(svbool_t pg, float64_t *base, svfloat64_t data)
    return SVE_ACLE_FUNC(svst1,_f64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1_vnum_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_vnum_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[DATA:%.*]], ptr [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z18test_svst1_vnum_s8u10__SVBool_tPalu10__SVInt8_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst1_vnum_s8u10__SVBool_tPalu10__SVInt8_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[DATA:%.*]], ptr [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data)
@@ -240,22 +214,18 @@ void test_svst1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data)
    return SVE_ACLE_FUNC(svst1_vnum,_s8,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_s16u10__SVBool_tPslu11__SVInt16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst1_vnum_s16u10__SVBool_tPslu11__SVInt16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data)
@@ -263,22 +233,18 @@ void test_svst1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t dat
    return SVE_ACLE_FUNC(svst1_vnum,_s16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_s32u10__SVBool_tPilu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst1_vnum_s32u10__SVBool_tPilu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data)
@@ -286,22 +252,18 @@ void test_svst1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t dat
    return SVE_ACLE_FUNC(svst1_vnum,_s32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_s64u10__SVBool_tPllu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst1_vnum_s64u10__SVBool_tPllu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data)
@@ -309,20 +271,16 @@ void test_svst1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t dat
    return SVE_ACLE_FUNC(svst1_vnum,_s64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1_vnum_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_vnum_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[DATA:%.*]], ptr [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z18test_svst1_vnum_u8u10__SVBool_tPhlu11__SVUint8_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst1_vnum_u8u10__SVBool_tPhlu11__SVUint8_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[DATA:%.*]], ptr [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data)
@@ -330,22 +288,18 @@ void test_svst1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data
    return SVE_ACLE_FUNC(svst1_vnum,_u8,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_u16u10__SVBool_tPtlu12__SVUint16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst1_vnum_u16u10__SVBool_tPtlu12__SVUint16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data)
@@ -353,22 +307,18 @@ void test_svst1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t d
    return SVE_ACLE_FUNC(svst1_vnum,_u16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_u32u10__SVBool_tPjlu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst1_vnum_u32u10__SVBool_tPjlu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data)
@@ -376,22 +326,18 @@ void test_svst1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t d
    return SVE_ACLE_FUNC(svst1_vnum,_u32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_u64u10__SVBool_tPmlu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst1_vnum_u64u10__SVBool_tPmlu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data)
@@ -399,22 +345,18 @@ void test_svst1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t d
    return SVE_ACLE_FUNC(svst1_vnum,_u64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1_vnum_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_vnum_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast half* [[TMP2]] to <vscale x 8 x half>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8f16.p0nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x half>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8f16.p0(<vscale x 8 x half> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_f16u10__SVBool_tPDhlu13__SVFloat16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst1_vnum_f16u10__SVBool_tPDhlu13__SVFloat16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast half* [[TMP2]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8f16.p0nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x half>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8f16.p0(<vscale x 8 x half> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t data)
@@ -422,22 +364,18 @@ void test_svst1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t
    return SVE_ACLE_FUNC(svst1_vnum,_f16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1_vnum_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_vnum_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <vscale x 4 x float>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x float>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_f32u10__SVBool_tPflu13__SVFloat32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst1_vnum_f32u10__SVBool_tPflu13__SVFloat32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x float>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data)
@@ -445,22 +383,18 @@ void test_svst1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t
    return SVE_ACLE_FUNC(svst1_vnum,_f32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1_vnum_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_vnum_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[TMP2]] to <vscale x 2 x double>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x double>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_f64u10__SVBool_tPdlu13__SVFloat64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst1_vnum_f64u10__SVBool_tPdlu13__SVFloat64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[TMP2]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x double>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[DATA:%.*]], ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data)
@@ -468,13 +402,13 @@ void test_svst1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t
    return SVE_ACLE_FUNC(svst1_vnum,_f64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32base_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32base_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z30test_svst1_scatter_u32base_s32u10__SVBool_tu12__SVUint32_tu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svst1_scatter_u32base_s32u10__SVBool_tu12__SVUint32_tu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -485,13 +419,13 @@ void test_svst1_scatter_u32base_s32(svbool_t pg, svuint32_t bases, svint32_t dat
    return SVE_ACLE_FUNC(svst1_scatter,_u32base,,_s32)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z30test_svst1_scatter_u64base_s64u10__SVBool_tu12__SVUint64_tu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svst1_scatter_u64base_s64u10__SVBool_tu12__SVUint64_tu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -502,13 +436,13 @@ void test_svst1_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t dat
    return SVE_ACLE_FUNC(svst1_scatter,_u64base,,_s64)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32base_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32base_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z30test_svst1_scatter_u32base_u32u10__SVBool_tu12__SVUint32_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svst1_scatter_u32base_u32u10__SVBool_tu12__SVUint32_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -519,13 +453,13 @@ void test_svst1_scatter_u32base_u32(svbool_t pg, svuint32_t bases, svuint32_t da
    return SVE_ACLE_FUNC(svst1_scatter,_u32base,,_u32)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z30test_svst1_scatter_u64base_u64u10__SVBool_tu12__SVUint64_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svst1_scatter_u64base_u64u10__SVBool_tu12__SVUint64_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -536,13 +470,13 @@ void test_svst1_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t da
    return SVE_ACLE_FUNC(svst1_scatter,_u64base,,_u64)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32base_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32base_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z30test_svst1_scatter_u32base_f32u10__SVBool_tu12__SVUint32_tu13__SVFloat32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svst1_scatter_u32base_f32u10__SVBool_tu12__SVUint32_tu13__SVFloat32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 0)
@@ -553,13 +487,13 @@ void test_svst1_scatter_u32base_f32(svbool_t pg, svuint32_t bases, svfloat32_t d
    return SVE_ACLE_FUNC(svst1_scatter,_u32base,,_f32)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64base_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64base_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z30test_svst1_scatter_u64base_f64u10__SVBool_tu12__SVUint64_tu13__SVFloat64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z30test_svst1_scatter_u64base_f64u10__SVBool_tu12__SVUint64_tu13__SVFloat64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 0)
@@ -570,16 +504,16 @@ void test_svst1_scatter_u64base_f64(svbool_t pg, svuint64_t bases, svfloat64_t d
    return SVE_ACLE_FUNC(svst1_scatter,_u64base,,_f64)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_s32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_s32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z32test_svst1_scatter_s32offset_s32u10__SVBool_tPiu11__SVInt32_tu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svst1_scatter_s32offset_s32u10__SVBool_tPiu11__SVInt32_tu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_s32offset_s32(svbool_t pg, int32_t *base, svint32_t offsets, svint32_t data)
@@ -587,16 +521,16 @@ void test_svst1_scatter_s32offset_s32(svbool_t pg, int32_t *base, svint32_t offs
    return SVE_ACLE_FUNC(svst1_scatter_,s32,offset,_s32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z32test_svst1_scatter_s64offset_s64u10__SVBool_tPlu11__SVInt64_tu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svst1_scatter_s64offset_s64u10__SVBool_tPlu11__SVInt64_tu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_s64offset_s64(svbool_t pg, int64_t *base, svint64_t offsets, svint64_t data)
@@ -604,16 +538,16 @@ void test_svst1_scatter_s64offset_s64(svbool_t pg, int64_t *base, svint64_t offs
    return SVE_ACLE_FUNC(svst1_scatter_,s64,offset,_s64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_s32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_s32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z32test_svst1_scatter_s32offset_u32u10__SVBool_tPju11__SVInt32_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svst1_scatter_s32offset_u32u10__SVBool_tPju11__SVInt32_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_s32offset_u32(svbool_t pg, uint32_t *base, svint32_t offsets, svuint32_t data)
@@ -621,16 +555,16 @@ void test_svst1_scatter_s32offset_u32(svbool_t pg, uint32_t *base, svint32_t off
    return SVE_ACLE_FUNC(svst1_scatter_,s32,offset,_u32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z32test_svst1_scatter_s64offset_u64u10__SVBool_tPmu11__SVInt64_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svst1_scatter_s64offset_u64u10__SVBool_tPmu11__SVInt64_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_s64offset_u64(svbool_t pg, uint64_t *base, svint64_t offsets, svuint64_t data)
@@ -638,16 +572,16 @@ void test_svst1_scatter_s64offset_u64(svbool_t pg, uint64_t *base, svint64_t off
    return SVE_ACLE_FUNC(svst1_scatter_,s64,offset,_u64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_s32offset_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_s32offset_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z32test_svst1_scatter_s32offset_f32u10__SVBool_tPfu11__SVInt32_tu13__SVFloat32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svst1_scatter_s32offset_f32u10__SVBool_tPfu11__SVInt32_tu13__SVFloat32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_s32offset_f32(svbool_t pg, float32_t *base, svint32_t offsets, svfloat32_t data)
@@ -655,16 +589,16 @@ void test_svst1_scatter_s32offset_f32(svbool_t pg, float32_t *base, svint32_t of
    return SVE_ACLE_FUNC(svst1_scatter_,s32,offset,_f32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_s64offset_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_s64offset_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z32test_svst1_scatter_s64offset_f64u10__SVBool_tPdu11__SVInt64_tu13__SVFloat64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svst1_scatter_s64offset_f64u10__SVBool_tPdu11__SVInt64_tu13__SVFloat64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_s64offset_f64(svbool_t pg, float64_t *base, svint64_t offsets, svfloat64_t data)
@@ -672,16 +606,16 @@ void test_svst1_scatter_s64offset_f64(svbool_t pg, float64_t *base, svint64_t of
    return SVE_ACLE_FUNC(svst1_scatter_,s64,offset,_f64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z32test_svst1_scatter_u32offset_s32u10__SVBool_tPiu12__SVUint32_tu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svst1_scatter_u32offset_s32u10__SVBool_tPiu12__SVUint32_tu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_u32offset_s32(svbool_t pg, int32_t *base, svuint32_t offsets, svint32_t data)
@@ -689,16 +623,16 @@ void test_svst1_scatter_u32offset_s32(svbool_t pg, int32_t *base, svuint32_t off
    return SVE_ACLE_FUNC(svst1_scatter_,u32,offset,_s32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z32test_svst1_scatter_u64offset_s64u10__SVBool_tPlu12__SVUint64_tu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svst1_scatter_u64offset_s64u10__SVBool_tPlu12__SVUint64_tu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_u64offset_s64(svbool_t pg, int64_t *base, svuint64_t offsets, svint64_t data)
@@ -706,16 +640,16 @@ void test_svst1_scatter_u64offset_s64(svbool_t pg, int64_t *base, svuint64_t off
    return SVE_ACLE_FUNC(svst1_scatter_,u64,offset,_s64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z32test_svst1_scatter_u32offset_u32u10__SVBool_tPju12__SVUint32_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svst1_scatter_u32offset_u32u10__SVBool_tPju12__SVUint32_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_u32offset_u32(svbool_t pg, uint32_t *base, svuint32_t offsets, svuint32_t data)
@@ -723,16 +657,16 @@ void test_svst1_scatter_u32offset_u32(svbool_t pg, uint32_t *base, svuint32_t of
    return SVE_ACLE_FUNC(svst1_scatter_,u32,offset,_u32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z32test_svst1_scatter_u64offset_u64u10__SVBool_tPmu12__SVUint64_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svst1_scatter_u64offset_u64u10__SVBool_tPmu12__SVUint64_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_u64offset_u64(svbool_t pg, uint64_t *base, svuint64_t offsets, svuint64_t data)
@@ -740,16 +674,16 @@ void test_svst1_scatter_u64offset_u64(svbool_t pg, uint64_t *base, svuint64_t of
    return SVE_ACLE_FUNC(svst1_scatter_,u64,offset,_u64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32offset_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32offset_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z32test_svst1_scatter_u32offset_f32u10__SVBool_tPfu12__SVUint32_tu13__SVFloat32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svst1_scatter_u32offset_f32u10__SVBool_tPfu12__SVUint32_tu13__SVFloat32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_u32offset_f32(svbool_t pg, float32_t *base, svuint32_t offsets, svfloat32_t data)
@@ -757,16 +691,16 @@ void test_svst1_scatter_u32offset_f32(svbool_t pg, float32_t *base, svuint32_t o
    return SVE_ACLE_FUNC(svst1_scatter_,u32,offset,_f32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64offset_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64offset_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z32test_svst1_scatter_u64offset_f64u10__SVBool_tPdu12__SVUint64_tu13__SVFloat64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z32test_svst1_scatter_u64offset_f64u10__SVBool_tPdu12__SVUint64_tu13__SVFloat64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_u64offset_f64(svbool_t pg, float64_t *base, svuint64_t offsets, svfloat64_t data)
@@ -774,13 +708,13 @@ void test_svst1_scatter_u64offset_f64(svbool_t pg, float64_t *base, svuint64_t o
    return SVE_ACLE_FUNC(svst1_scatter_,u64,offset,_f64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32base_offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32base_offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z37test_svst1_scatter_u32base_offset_s32u10__SVBool_tu12__SVUint32_tlu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svst1_scatter_u32base_offset_s32u10__SVBool_tu12__SVUint32_tlu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -791,13 +725,13 @@ void test_svst1_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_
    return SVE_ACLE_FUNC(svst1_scatter,_u32base,_offset,_s32)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z37test_svst1_scatter_u64base_offset_s64u10__SVBool_tu12__SVUint64_tlu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svst1_scatter_u64base_offset_s64u10__SVBool_tu12__SVUint64_tlu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -808,13 +742,13 @@ void test_svst1_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_
    return SVE_ACLE_FUNC(svst1_scatter,_u64base,_offset,_s64)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32base_offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32base_offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z37test_svst1_scatter_u32base_offset_u32u10__SVBool_tu12__SVUint32_tlu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svst1_scatter_u32base_offset_u32u10__SVBool_tu12__SVUint32_tlu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -825,13 +759,13 @@ void test_svst1_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_
    return SVE_ACLE_FUNC(svst1_scatter,_u32base,_offset,_u32)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z37test_svst1_scatter_u64base_offset_u64u10__SVBool_tu12__SVUint64_tlu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svst1_scatter_u64base_offset_u64u10__SVBool_tu12__SVUint64_tlu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -842,13 +776,13 @@ void test_svst1_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_
    return SVE_ACLE_FUNC(svst1_scatter,_u64base,_offset,_u64)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32base_offset_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32base_offset_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z37test_svst1_scatter_u32base_offset_f32u10__SVBool_tu12__SVUint32_tlu13__SVFloat32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svst1_scatter_u32base_offset_f32u10__SVBool_tu12__SVUint32_tlu13__SVFloat32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -859,13 +793,13 @@ void test_svst1_scatter_u32base_offset_f32(svbool_t pg, svuint32_t bases, int64_
    return SVE_ACLE_FUNC(svst1_scatter,_u32base,_offset,_f32)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64base_offset_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64base_offset_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z37test_svst1_scatter_u64base_offset_f64u10__SVBool_tu12__SVUint64_tlu13__SVFloat64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z37test_svst1_scatter_u64base_offset_f64u10__SVBool_tu12__SVUint64_tlu13__SVFloat64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
@@ -876,16 +810,16 @@ void test_svst1_scatter_u64base_offset_f64(svbool_t pg, svuint64_t bases, int64_
    return SVE_ACLE_FUNC(svst1_scatter,_u64base,_offset,_f64)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_s32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_s32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z31test_svst1_scatter_s32index_s32u10__SVBool_tPiu11__SVInt32_tu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svst1_scatter_s32index_s32u10__SVBool_tPiu11__SVInt32_tu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_s32index_s32(svbool_t pg, int32_t *base, svint32_t indices, svint32_t data)
@@ -893,16 +827,16 @@ void test_svst1_scatter_s32index_s32(svbool_t pg, int32_t *base, svint32_t indic
    return SVE_ACLE_FUNC(svst1_scatter_,s32,index,_s32)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_s64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_s64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z31test_svst1_scatter_s64index_s64u10__SVBool_tPlu11__SVInt64_tu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svst1_scatter_s64index_s64u10__SVBool_tPlu11__SVInt64_tu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_s64index_s64(svbool_t pg, int64_t *base, svint64_t indices, svint64_t data)
@@ -910,16 +844,16 @@ void test_svst1_scatter_s64index_s64(svbool_t pg, int64_t *base, svint64_t indic
    return SVE_ACLE_FUNC(svst1_scatter_,s64,index,_s64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_s32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_s32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z31test_svst1_scatter_s32index_u32u10__SVBool_tPju11__SVInt32_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svst1_scatter_s32index_u32u10__SVBool_tPju11__SVInt32_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_s32index_u32(svbool_t pg, uint32_t *base, svint32_t indices, svuint32_t data)
@@ -927,16 +861,16 @@ void test_svst1_scatter_s32index_u32(svbool_t pg, uint32_t *base, svint32_t indi
    return SVE_ACLE_FUNC(svst1_scatter_,s32,index,_u32)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_s64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_s64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z31test_svst1_scatter_s64index_u64u10__SVBool_tPmu11__SVInt64_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svst1_scatter_s64index_u64u10__SVBool_tPmu11__SVInt64_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_s64index_u64(svbool_t pg, uint64_t *base, svint64_t indices, svuint64_t data)
@@ -944,16 +878,16 @@ void test_svst1_scatter_s64index_u64(svbool_t pg, uint64_t *base, svint64_t indi
    return SVE_ACLE_FUNC(svst1_scatter_,s64,index,_u64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_s32index_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_s32index_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z31test_svst1_scatter_s32index_f32u10__SVBool_tPfu11__SVInt32_tu13__SVFloat32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svst1_scatter_s32index_f32u10__SVBool_tPfu11__SVInt32_tu13__SVFloat32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_s32index_f32(svbool_t pg, float32_t *base, svint32_t indices, svfloat32_t data)
@@ -961,16 +895,16 @@ void test_svst1_scatter_s32index_f32(svbool_t pg, float32_t *base, svint32_t ind
    return SVE_ACLE_FUNC(svst1_scatter_,s32,index,_f32)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_s64index_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_s64index_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z31test_svst1_scatter_s64index_f64u10__SVBool_tPdu11__SVInt64_tu13__SVFloat64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svst1_scatter_s64index_f64u10__SVBool_tPdu11__SVInt64_tu13__SVFloat64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_s64index_f64(svbool_t pg, float64_t *base, svint64_t indices, svfloat64_t data)
@@ -978,16 +912,16 @@ void test_svst1_scatter_s64index_f64(svbool_t pg, float64_t *base, svint64_t ind
    return SVE_ACLE_FUNC(svst1_scatter_,s64,index,_f64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z31test_svst1_scatter_u32index_s32u10__SVBool_tPiu12__SVUint32_tu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svst1_scatter_u32index_s32u10__SVBool_tPiu12__SVUint32_tu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_u32index_s32(svbool_t pg, int32_t *base, svuint32_t indices, svint32_t data)
@@ -995,16 +929,16 @@ void test_svst1_scatter_u32index_s32(svbool_t pg, int32_t *base, svuint32_t indi
    return SVE_ACLE_FUNC(svst1_scatter_,u32,index,_s32)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z31test_svst1_scatter_u64index_s64u10__SVBool_tPlu12__SVUint64_tu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svst1_scatter_u64index_s64u10__SVBool_tPlu12__SVUint64_tu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_u64index_s64(svbool_t pg, int64_t *base, svuint64_t indices, svint64_t data)
@@ -1012,16 +946,16 @@ void test_svst1_scatter_u64index_s64(svbool_t pg, int64_t *base, svuint64_t indi
    return SVE_ACLE_FUNC(svst1_scatter_,u64,index,_s64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z31test_svst1_scatter_u32index_u32u10__SVBool_tPju12__SVUint32_tu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svst1_scatter_u32index_u32u10__SVBool_tPju12__SVUint32_tu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_u32index_u32(svbool_t pg, uint32_t *base, svuint32_t indices, svuint32_t data)
@@ -1029,16 +963,16 @@ void test_svst1_scatter_u32index_u32(svbool_t pg, uint32_t *base, svuint32_t ind
    return SVE_ACLE_FUNC(svst1_scatter_,u32,index,_u32)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z31test_svst1_scatter_u64index_u64u10__SVBool_tPmu12__SVUint64_tu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svst1_scatter_u64index_u64u10__SVBool_tPmu12__SVUint64_tu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_u64index_u64(svbool_t pg, uint64_t *base, svuint64_t indices, svuint64_t data)
@@ -1046,16 +980,16 @@ void test_svst1_scatter_u64index_u64(svbool_t pg, uint64_t *base, svuint64_t ind
    return SVE_ACLE_FUNC(svst1_scatter_,u64,index,_u64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32index_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32index_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z31test_svst1_scatter_u32index_f32u10__SVBool_tPfu12__SVUint32_tu13__SVFloat32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svst1_scatter_u32index_f32u10__SVBool_tPfu12__SVUint32_tu13__SVFloat32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_u32index_f32(svbool_t pg, float32_t *base, svuint32_t indices, svfloat32_t data)
@@ -1063,16 +997,16 @@ void test_svst1_scatter_u32index_f32(svbool_t pg, float32_t *base, svuint32_t in
    return SVE_ACLE_FUNC(svst1_scatter_,u32,index,_f32)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64index_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64index_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z31test_svst1_scatter_u64index_f64u10__SVBool_tPdu12__SVUint64_tu13__SVFloat64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z31test_svst1_scatter_u64index_f64u10__SVBool_tPdu12__SVUint64_tu13__SVFloat64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst1_scatter_u64index_f64(svbool_t pg, float64_t *base, svuint64_t indices, svfloat64_t data)
@@ -1080,14 +1014,14 @@ void test_svst1_scatter_u64index_f64(svbool_t pg, float64_t *base, svuint64_t in
    return SVE_ACLE_FUNC(svst1_scatter_,u64,index,_f64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32base_index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32base_index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z36test_svst1_scatter_u32base_index_s32u10__SVBool_tu12__SVUint32_tlu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z36test_svst1_scatter_u32base_index_s32u10__SVBool_tu12__SVUint32_tlu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -1099,14 +1033,14 @@ void test_svst1_scatter_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t
    return SVE_ACLE_FUNC(svst1_scatter,_u32base,_index,_s32)(pg, bases, index, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64base_index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64base_index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z36test_svst1_scatter_u64base_index_s64u10__SVBool_tu12__SVUint64_tlu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z36test_svst1_scatter_u64base_index_s64u10__SVBool_tu12__SVUint64_tlu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
@@ -1118,14 +1052,14 @@ void test_svst1_scatter_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t
    return SVE_ACLE_FUNC(svst1_scatter,_u64base,_index,_s64)(pg, bases, index, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32base_index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32base_index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z36test_svst1_scatter_u32base_index_u32u10__SVBool_tu12__SVUint32_tlu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z36test_svst1_scatter_u32base_index_u32u10__SVBool_tu12__SVUint32_tlu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -1137,14 +1071,14 @@ void test_svst1_scatter_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t
    return SVE_ACLE_FUNC(svst1_scatter,_u32base,_index,_u32)(pg, bases, index, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64base_index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64base_index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z36test_svst1_scatter_u64base_index_u64u10__SVBool_tu12__SVUint64_tlu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z36test_svst1_scatter_u64base_index_u64u10__SVBool_tu12__SVUint64_tlu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
@@ -1156,14 +1090,14 @@ void test_svst1_scatter_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t
    return SVE_ACLE_FUNC(svst1_scatter,_u64base,_index,_u64)(pg, bases, index, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u32base_index_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u32base_index_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z36test_svst1_scatter_u32base_index_f32u10__SVBool_tu12__SVUint32_tlu13__SVFloat32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z36test_svst1_scatter_u32base_index_f32u10__SVBool_tu12__SVUint32_tlu13__SVFloat32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 2
@@ -1175,14 +1109,14 @@ void test_svst1_scatter_u32base_index_f32(svbool_t pg, svuint32_t bases, int64_t
    return SVE_ACLE_FUNC(svst1_scatter,_u32base,_index,_f32)(pg, bases, index, data);
  }
  
-// CHECK-LABEL: @test_svst1_scatter_u64base_index_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1_scatter_u64base_index_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
  // CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z36test_svst1_scatter_u64base_index_f64u10__SVBool_tu12__SVUint64_tlu13__SVFloat64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z36test_svst1_scatter_u64base_index_f64u10__SVBool_tu12__SVUint64_tlu13__SVFloat64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX:%.*]], 3
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c

index 46463ab..0f47658 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c
@@ -1,7 +1,7 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -11,12 +11,11 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svst1b_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = trunc <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> [[TMP1]], <vscale x 8 x i8>* [[TMP2]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_s16(svbool_t pg, int8_t *base, svint16_t data)
@@ -24,12 +23,11 @@ void test_svst1b_s16(svbool_t pg, int8_t *base, svint16_t data)
    return SVE_ACLE_FUNC(svst1b,_s16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1b_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> [[TMP1]], <vscale x 4 x i8>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_s32(svbool_t pg, int8_t *base, svint32_t data)
@@ -37,12 +35,11 @@ void test_svst1b_s32(svbool_t pg, int8_t *base, svint32_t data)
    return SVE_ACLE_FUNC(svst1b,_s32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1b_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> [[TMP1]], <vscale x 2 x i8>* [[TMP2]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_s64(svbool_t pg, int8_t *base, svint64_t data)
@@ -50,12 +47,11 @@ void test_svst1b_s64(svbool_t pg, int8_t *base, svint64_t data)
    return SVE_ACLE_FUNC(svst1b,_s64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1b_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = trunc <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> [[TMP1]], <vscale x 8 x i8>* [[TMP2]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_u16(svbool_t pg, uint8_t *base, svuint16_t data)
@@ -63,12 +59,11 @@ void test_svst1b_u16(svbool_t pg, uint8_t *base, svuint16_t data)
    return SVE_ACLE_FUNC(svst1b,_u16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1b_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> [[TMP1]], <vscale x 4 x i8>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_u32(svbool_t pg, uint8_t *base, svuint32_t data)
@@ -76,12 +71,11 @@ void test_svst1b_u32(svbool_t pg, uint8_t *base, svuint32_t data)
    return SVE_ACLE_FUNC(svst1b,_u32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1b_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> [[TMP1]], <vscale x 2 x i8>* [[TMP2]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_u64(svbool_t pg, uint8_t *base, svuint64_t data)
@@ -89,14 +83,12 @@ void test_svst1b_u64(svbool_t pg, uint8_t *base, svuint64_t data)
    return SVE_ACLE_FUNC(svst1b,_u64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1b_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
  // CHECK-NEXT:    [[TMP2:%.*]] = trunc <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> [[TMP2]], <vscale x 8 x i8>* [[TMP4]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP2]], ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_vnum_s16(svbool_t pg, int8_t *base, int64_t vnum, svint16_t data)
@@ -104,14 +96,12 @@ void test_svst1b_vnum_s16(svbool_t pg, int8_t *base, int64_t vnum, svint16_t dat
    return SVE_ACLE_FUNC(svst1b_vnum,_s16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1b_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
  // CHECK-NEXT:    [[TMP2:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> [[TMP2]], <vscale x 4 x i8>* [[TMP4]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP2]], ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_vnum_s32(svbool_t pg, int8_t *base, int64_t vnum, svint32_t data)
@@ -119,14 +109,12 @@ void test_svst1b_vnum_s32(svbool_t pg, int8_t *base, int64_t vnum, svint32_t dat
    return SVE_ACLE_FUNC(svst1b_vnum,_s32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1b_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
  // CHECK-NEXT:    [[TMP2:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> [[TMP2]], <vscale x 2 x i8>* [[TMP4]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP2]], ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_vnum_s64(svbool_t pg, int8_t *base, int64_t vnum, svint64_t data)
@@ -134,14 +122,12 @@ void test_svst1b_vnum_s64(svbool_t pg, int8_t *base, int64_t vnum, svint64_t dat
    return SVE_ACLE_FUNC(svst1b_vnum,_s64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1b_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
  // CHECK-NEXT:    [[TMP2:%.*]] = trunc <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <vscale x 8 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> [[TMP2]], <vscale x 8 x i8>* [[TMP4]], i32 1, <vscale x 8 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP2]], ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_vnum_u16(svbool_t pg, uint8_t *base, int64_t vnum, svuint16_t data)
@@ -149,14 +135,12 @@ void test_svst1b_vnum_u16(svbool_t pg, uint8_t *base, int64_t vnum, svuint16_t d
    return SVE_ACLE_FUNC(svst1b_vnum,_u16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1b_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
  // CHECK-NEXT:    [[TMP2:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <vscale x 4 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> [[TMP2]], <vscale x 4 x i8>* [[TMP4]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP2]], ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_vnum_u32(svbool_t pg, uint8_t *base, int64_t vnum, svuint32_t data)
@@ -164,14 +148,12 @@ void test_svst1b_vnum_u32(svbool_t pg, uint8_t *base, int64_t vnum, svuint32_t d
    return SVE_ACLE_FUNC(svst1b_vnum,_u32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1b_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
  // CHECK-NEXT:    [[TMP2:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <vscale x 2 x i8>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> [[TMP2]], <vscale x 2 x i8>* [[TMP4]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP2]], ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_vnum_u64(svbool_t pg, uint8_t *base, int64_t vnum, svuint64_t data)
@@ -179,7 +161,7 @@ void test_svst1b_vnum_u64(svbool_t pg, uint8_t *base, int64_t vnum, svuint64_t d
    return SVE_ACLE_FUNC(svst1b_vnum,_u64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_u32base_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_u32base_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -191,7 +173,7 @@ void test_svst1b_scatter_u32base_s32(svbool_t pg, svuint32_t bases, svint32_t da
    return SVE_ACLE_FUNC(svst1b_scatter,_u32base,,_s32)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -203,7 +185,7 @@ void test_svst1b_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t da
    return SVE_ACLE_FUNC(svst1b_scatter,_u64base,,_s64)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_u32base_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_u32base_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -215,7 +197,7 @@ void test_svst1b_scatter_u32base_u32(svbool_t pg, svuint32_t bases, svuint32_t d
    return SVE_ACLE_FUNC(svst1b_scatter,_u32base,,_u32)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -227,11 +209,11 @@ void test_svst1b_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t d
    return SVE_ACLE_FUNC(svst1b_scatter,_u64base,,_u64)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_s32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_s32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_scatter_s32offset_s32(svbool_t pg, int8_t *base, svint32_t offsets, svint32_t data)
@@ -239,11 +221,11 @@ void test_svst1b_scatter_s32offset_s32(svbool_t pg, int8_t *base, svint32_t offs
    return SVE_ACLE_FUNC(svst1b_scatter_,s32,offset,_s32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_scatter_s64offset_s64(svbool_t pg, int8_t *base, svint64_t offsets, svint64_t data)
@@ -251,11 +233,11 @@ void test_svst1b_scatter_s64offset_s64(svbool_t pg, int8_t *base, svint64_t offs
    return SVE_ACLE_FUNC(svst1b_scatter_,s64,offset,_s64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_s32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_s32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_scatter_s32offset_u32(svbool_t pg, uint8_t *base, svint32_t offsets, svuint32_t data)
@@ -263,11 +245,11 @@ void test_svst1b_scatter_s32offset_u32(svbool_t pg, uint8_t *base, svint32_t off
    return SVE_ACLE_FUNC(svst1b_scatter_,s32,offset,_u32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_scatter_s64offset_u64(svbool_t pg, uint8_t *base, svint64_t offsets, svuint64_t data)
@@ -275,11 +257,11 @@ void test_svst1b_scatter_s64offset_u64(svbool_t pg, uint8_t *base, svint64_t off
    return SVE_ACLE_FUNC(svst1b_scatter_,s64,offset,_u64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_u32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_u32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_scatter_u32offset_s32(svbool_t pg, int8_t *base, svuint32_t offsets, svint32_t data)
@@ -287,11 +269,11 @@ void test_svst1b_scatter_u32offset_s32(svbool_t pg, int8_t *base, svuint32_t off
    return SVE_ACLE_FUNC(svst1b_scatter_,u32,offset,_s32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_scatter_u64offset_s64(svbool_t pg, int8_t *base, svuint64_t offsets, svint64_t data)
@@ -299,11 +281,11 @@ void test_svst1b_scatter_u64offset_s64(svbool_t pg, int8_t *base, svuint64_t off
    return SVE_ACLE_FUNC(svst1b_scatter_,u64,offset,_s64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_u32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_u32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i8* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_scatter_u32offset_u32(svbool_t pg, uint8_t *base, svuint32_t offsets, svuint32_t data)
@@ -311,11 +293,11 @@ void test_svst1b_scatter_u32offset_u32(svbool_t pg, uint8_t *base, svuint32_t of
    return SVE_ACLE_FUNC(svst1b_scatter_,u32,offset,_u32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i8* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1b_scatter_u64offset_u64(svbool_t pg, uint8_t *base, svuint64_t offsets, svuint64_t data)
@@ -323,7 +305,7 @@ void test_svst1b_scatter_u64offset_u64(svbool_t pg, uint8_t *base, svuint64_t of
    return SVE_ACLE_FUNC(svst1b_scatter_,u64,offset,_u64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_u32base_offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_u32base_offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -335,7 +317,7 @@ void test_svst1b_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64
    return SVE_ACLE_FUNC(svst1b_scatter,_u32base,_offset,_s32)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -347,7 +329,7 @@ void test_svst1b_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64
    return SVE_ACLE_FUNC(svst1b_scatter,_u64base,_offset,_s64)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_u32base_offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_u32base_offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -359,7 +341,7 @@ void test_svst1b_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64
    return SVE_ACLE_FUNC(svst1b_scatter,_u32base,_offset,_u32)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1b_scatter_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1b_scatter_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c

index 8f2c86b..23a9a05 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c
@@ -1,7 +1,7 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -11,12 +11,11 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svst1h_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> [[TMP1]], <vscale x 4 x i16>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i16.p0(<vscale x 4 x i16> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_s32(svbool_t pg, int16_t *base, svint32_t data)
@@ -24,12 +23,11 @@ void test_svst1h_s32(svbool_t pg, int16_t *base, svint32_t data)
    return SVE_ACLE_FUNC(svst1h,_s32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1h_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> [[TMP1]], <vscale x 2 x i16>* [[TMP2]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_s64(svbool_t pg, int16_t *base, svint64_t data)
@@ -37,12 +35,11 @@ void test_svst1h_s64(svbool_t pg, int16_t *base, svint64_t data)
    return SVE_ACLE_FUNC(svst1h,_s64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1h_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> [[TMP1]], <vscale x 4 x i16>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i16.p0(<vscale x 4 x i16> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_u32(svbool_t pg, uint16_t *base, svuint32_t data)
@@ -50,12 +47,11 @@ void test_svst1h_u32(svbool_t pg, uint16_t *base, svuint32_t data)
    return SVE_ACLE_FUNC(svst1h,_u32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1h_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> [[TMP1]], <vscale x 2 x i16>* [[TMP2]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_u64(svbool_t pg, uint16_t *base, svuint64_t data)
@@ -63,14 +59,12 @@ void test_svst1h_u64(svbool_t pg, uint16_t *base, svuint64_t data)
    return SVE_ACLE_FUNC(svst1h,_u64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1h_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
  // CHECK-NEXT:    [[TMP2:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> [[TMP2]], <vscale x 4 x i16>* [[TMP4]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i16.p0(<vscale x 4 x i16> [[TMP2]], ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_vnum_s32(svbool_t pg, int16_t *base, int64_t vnum, svint32_t data)
@@ -78,14 +72,12 @@ void test_svst1h_vnum_s32(svbool_t pg, int16_t *base, int64_t vnum, svint32_t da
    return SVE_ACLE_FUNC(svst1h_vnum,_s32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1h_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
  // CHECK-NEXT:    [[TMP2:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> [[TMP2]], <vscale x 2 x i16>* [[TMP4]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> [[TMP2]], ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_vnum_s64(svbool_t pg, int16_t *base, int64_t vnum, svint64_t data)
@@ -93,14 +85,12 @@ void test_svst1h_vnum_s64(svbool_t pg, int16_t *base, int64_t vnum, svint64_t da
    return SVE_ACLE_FUNC(svst1h_vnum,_s64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1h_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
  // CHECK-NEXT:    [[TMP2:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <vscale x 4 x i16>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> [[TMP2]], <vscale x 4 x i16>* [[TMP4]], i32 1, <vscale x 4 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i16.p0(<vscale x 4 x i16> [[TMP2]], ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_vnum_u32(svbool_t pg, uint16_t *base, int64_t vnum, svuint32_t data)
@@ -108,14 +98,12 @@ void test_svst1h_vnum_u32(svbool_t pg, uint16_t *base, int64_t vnum, svuint32_t
    return SVE_ACLE_FUNC(svst1h_vnum,_u32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1h_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
  // CHECK-NEXT:    [[TMP2:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <vscale x 2 x i16>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> [[TMP2]], <vscale x 2 x i16>* [[TMP4]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> [[TMP2]], ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_vnum_u64(svbool_t pg, uint16_t *base, int64_t vnum, svuint64_t data)
@@ -123,7 +111,7 @@ void test_svst1h_vnum_u64(svbool_t pg, uint16_t *base, int64_t vnum, svuint64_t
    return SVE_ACLE_FUNC(svst1h_vnum,_u64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u32base_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u32base_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -135,7 +123,7 @@ void test_svst1h_scatter_u32base_s32(svbool_t pg, svuint32_t bases, svint32_t da
    return SVE_ACLE_FUNC(svst1h_scatter,_u32base,,_s32)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -147,7 +135,7 @@ void test_svst1h_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t da
    return SVE_ACLE_FUNC(svst1h_scatter,_u64base,,_s64)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u32base_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u32base_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -159,7 +147,7 @@ void test_svst1h_scatter_u32base_u32(svbool_t pg, svuint32_t bases, svuint32_t d
    return SVE_ACLE_FUNC(svst1h_scatter,_u32base,,_u32)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -171,11 +159,11 @@ void test_svst1h_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t d
    return SVE_ACLE_FUNC(svst1h_scatter,_u64base,,_u64)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_s32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_s32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_s32offset_s32(svbool_t pg, int16_t *base, svint32_t offsets, svint32_t data)
@@ -183,11 +171,11 @@ void test_svst1h_scatter_s32offset_s32(svbool_t pg, int16_t *base, svint32_t off
    return SVE_ACLE_FUNC(svst1h_scatter_,s32,offset,_s32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_s64offset_s64(svbool_t pg, int16_t *base, svint64_t offsets, svint64_t data)
@@ -195,11 +183,11 @@ void test_svst1h_scatter_s64offset_s64(svbool_t pg, int16_t *base, svint64_t off
    return SVE_ACLE_FUNC(svst1h_scatter_,s64,offset,_s64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_s32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_s32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_s32offset_u32(svbool_t pg, uint16_t *base, svint32_t offsets, svuint32_t data)
@@ -207,11 +195,11 @@ void test_svst1h_scatter_s32offset_u32(svbool_t pg, uint16_t *base, svint32_t of
    return SVE_ACLE_FUNC(svst1h_scatter_,s32,offset,_u32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_s64offset_u64(svbool_t pg, uint16_t *base, svint64_t offsets, svuint64_t data)
@@ -219,11 +207,11 @@ void test_svst1h_scatter_s64offset_u64(svbool_t pg, uint16_t *base, svint64_t of
    return SVE_ACLE_FUNC(svst1h_scatter_,s64,offset,_u64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u32offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u32offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_u32offset_s32(svbool_t pg, int16_t *base, svuint32_t offsets, svint32_t data)
@@ -231,11 +219,11 @@ void test_svst1h_scatter_u32offset_s32(svbool_t pg, int16_t *base, svuint32_t of
    return SVE_ACLE_FUNC(svst1h_scatter_,u32,offset,_s32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_u64offset_s64(svbool_t pg, int16_t *base, svuint64_t offsets, svint64_t data)
@@ -243,11 +231,11 @@ void test_svst1h_scatter_u64offset_s64(svbool_t pg, int16_t *base, svuint64_t of
    return SVE_ACLE_FUNC(svst1h_scatter_,u64,offset,_s64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u32offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u32offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_u32offset_u32(svbool_t pg, uint16_t *base, svuint32_t offsets, svuint32_t data)
@@ -255,11 +243,11 @@ void test_svst1h_scatter_u32offset_u32(svbool_t pg, uint16_t *base, svuint32_t o
    return SVE_ACLE_FUNC(svst1h_scatter_,u32,offset,_u32)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_u64offset_u64(svbool_t pg, uint16_t *base, svuint64_t offsets, svuint64_t data)
@@ -267,7 +255,7 @@ void test_svst1h_scatter_u64offset_u64(svbool_t pg, uint16_t *base, svuint64_t o
    return SVE_ACLE_FUNC(svst1h_scatter_,u64,offset,_u64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u32base_offset_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u32base_offset_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -279,7 +267,7 @@ void test_svst1h_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64
    return SVE_ACLE_FUNC(svst1h_scatter,_u32base,_offset,_s32)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -291,7 +279,7 @@ void test_svst1h_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64
    return SVE_ACLE_FUNC(svst1h_scatter,_u64base,_offset,_s64)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u32base_offset_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u32base_offset_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -303,7 +291,7 @@ void test_svst1h_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64
    return SVE_ACLE_FUNC(svst1h_scatter,_u32base,_offset,_u32)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -315,11 +303,11 @@ void test_svst1h_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64
    return SVE_ACLE_FUNC(svst1h_scatter,_u64base,_offset,_u64)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_s32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_s32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_s32index_s32(svbool_t pg, int16_t *base, svint32_t indices, svint32_t data)
@@ -327,11 +315,11 @@ void test_svst1h_scatter_s32index_s32(svbool_t pg, int16_t *base, svint32_t indi
    return SVE_ACLE_FUNC(svst1h_scatter_,s32,index,_s32)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_s64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_s64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_s64index_s64(svbool_t pg, int16_t *base, svint64_t indices, svint64_t data)
@@ -339,11 +327,11 @@ void test_svst1h_scatter_s64index_s64(svbool_t pg, int16_t *base, svint64_t indi
    return SVE_ACLE_FUNC(svst1h_scatter_,s64,index,_s64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_s32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_s32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_s32index_u32(svbool_t pg, uint16_t *base, svint32_t indices, svuint32_t data)
@@ -351,11 +339,11 @@ void test_svst1h_scatter_s32index_u32(svbool_t pg, uint16_t *base, svint32_t ind
    return SVE_ACLE_FUNC(svst1h_scatter_,s32,index,_u32)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_s64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_s64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_s64index_u64(svbool_t pg, uint16_t *base, svint64_t indices, svuint64_t data)
@@ -363,11 +351,11 @@ void test_svst1h_scatter_s64index_u64(svbool_t pg, uint16_t *base, svint64_t ind
    return SVE_ACLE_FUNC(svst1h_scatter_,s64,index,_u64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u32index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u32index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_u32index_s32(svbool_t pg, int16_t *base, svuint32_t indices, svint32_t data)
@@ -375,11 +363,11 @@ void test_svst1h_scatter_u32index_s32(svbool_t pg, int16_t *base, svuint32_t ind
    return SVE_ACLE_FUNC(svst1h_scatter_,u32,index,_s32)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_u64index_s64(svbool_t pg, int16_t *base, svuint64_t indices, svint64_t data)
@@ -387,11 +375,11 @@ void test_svst1h_scatter_u64index_s64(svbool_t pg, int16_t *base, svuint64_t ind
    return SVE_ACLE_FUNC(svst1h_scatter_,u64,index,_s64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u32index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u32index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_u32index_u32(svbool_t pg, uint16_t *base, svuint32_t indices, svuint32_t data)
@@ -399,11 +387,11 @@ void test_svst1h_scatter_u32index_u32(svbool_t pg, uint16_t *base, svuint32_t in
    return SVE_ACLE_FUNC(svst1h_scatter_,u32,index,_u32)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i16* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1h_scatter_u64index_u64(svbool_t pg, uint16_t *base, svuint64_t indices, svuint64_t data)
@@ -411,7 +399,7 @@ void test_svst1h_scatter_u64index_u64(svbool_t pg, uint16_t *base, svuint64_t in
    return SVE_ACLE_FUNC(svst1h_scatter_,u64,index,_u64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u32base_index_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u32base_index_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -424,7 +412,7 @@ void test_svst1h_scatter_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_
    return SVE_ACLE_FUNC(svst1h_scatter,_u32base,_index,_s32)(pg, bases, index, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u64base_index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u64base_index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -437,7 +425,7 @@ void test_svst1h_scatter_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_
    return SVE_ACLE_FUNC(svst1h_scatter,_u64base,_index,_s64)(pg, bases, index, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u32base_index_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u32base_index_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -450,7 +438,7 @@ void test_svst1h_scatter_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_
    return SVE_ACLE_FUNC(svst1h_scatter,_u32base,_index,_u32)(pg, bases, index, data);
  }
  
-// CHECK-LABEL: @test_svst1h_scatter_u64base_index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1h_scatter_u64base_index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c

index 4c87fde..b2f6961 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -12,12 +12,11 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svst1w_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> [[TMP1]], <vscale x 2 x i32>* [[TMP2]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1w_s64(svbool_t pg, int32_t *base, svint64_t data)
@@ -25,12 +24,11 @@ void test_svst1w_s64(svbool_t pg, int32_t *base, svint64_t data)
    return SVE_ACLE_FUNC(svst1w,_s64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1w_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> [[TMP1]], <vscale x 2 x i32>* [[TMP2]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1w_u64(svbool_t pg, uint32_t *base, svuint64_t data)
@@ -38,14 +36,12 @@ void test_svst1w_u64(svbool_t pg, uint32_t *base, svuint64_t data)
    return SVE_ACLE_FUNC(svst1w,_u64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst1w_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
  // CHECK-NEXT:    [[TMP2:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> [[TMP2]], <vscale x 2 x i32>* [[TMP4]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP2]], ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1w_vnum_s64(svbool_t pg, int32_t *base, int64_t vnum, svint64_t data)
@@ -53,14 +49,12 @@ void test_svst1w_vnum_s64(svbool_t pg, int32_t *base, int64_t vnum, svint64_t da
    return SVE_ACLE_FUNC(svst1w_vnum,_s64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1w_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
  // CHECK-NEXT:    [[TMP2:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <vscale x 2 x i32>*
-// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> [[TMP2]], <vscale x 2 x i32>* [[TMP4]], i32 1, <vscale x 2 x i1> [[TMP0]])
+// CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP2]], ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1w_vnum_u64(svbool_t pg, uint32_t *base, int64_t vnum, svuint64_t data)
@@ -68,7 +62,7 @@ void test_svst1w_vnum_u64(svbool_t pg, uint32_t *base, int64_t vnum, svuint64_t
    return SVE_ACLE_FUNC(svst1w_vnum,_u64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_u64base_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_u64base_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -80,7 +74,7 @@ void test_svst1w_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t da
    return SVE_ACLE_FUNC(svst1w_scatter,_u64base,,_s64)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_u64base_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_u64base_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -92,11 +86,11 @@ void test_svst1w_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t d
    return SVE_ACLE_FUNC(svst1w_scatter,_u64base,,_u64)(pg, bases, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_s64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_s64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1w_scatter_s64offset_s64(svbool_t pg, int32_t *base, svint64_t offsets, svint64_t data)
@@ -104,11 +98,11 @@ void test_svst1w_scatter_s64offset_s64(svbool_t pg, int32_t *base, svint64_t off
    return SVE_ACLE_FUNC(svst1w_scatter_,s64,offset,_s64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_s64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_s64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1w_scatter_s64offset_u64(svbool_t pg, uint32_t *base, svint64_t offsets, svuint64_t data)
@@ -116,11 +110,11 @@ void test_svst1w_scatter_s64offset_u64(svbool_t pg, uint32_t *base, svint64_t of
    return SVE_ACLE_FUNC(svst1w_scatter_,s64,offset,_u64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_u64offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_u64offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1w_scatter_u64offset_s64(svbool_t pg, int32_t *base, svuint64_t offsets, svint64_t data)
@@ -128,11 +122,11 @@ void test_svst1w_scatter_u64offset_s64(svbool_t pg, int32_t *base, svuint64_t of
    return SVE_ACLE_FUNC(svst1w_scatter_,u64,offset,_s64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_u64offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_u64offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1w_scatter_u64offset_u64(svbool_t pg, uint32_t *base, svuint64_t offsets, svuint64_t data)
@@ -140,7 +134,7 @@ void test_svst1w_scatter_u64offset_u64(svbool_t pg, uint32_t *base, svuint64_t o
    return SVE_ACLE_FUNC(svst1w_scatter_,u64,offset,_u64)(pg, base, offsets, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_u64base_offset_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_u64base_offset_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -152,7 +146,7 @@ void test_svst1w_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64
    return SVE_ACLE_FUNC(svst1w_scatter,_u64base,_offset,_s64)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_u64base_offset_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_u64base_offset_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -164,11 +158,11 @@ void test_svst1w_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64
    return SVE_ACLE_FUNC(svst1w_scatter,_u64base,_offset,_u64)(pg, bases, offset, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_s64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_s64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1w_scatter_s64index_s64(svbool_t pg, int32_t *base, svint64_t indices, svint64_t data)
@@ -176,11 +170,11 @@ void test_svst1w_scatter_s64index_s64(svbool_t pg, int32_t *base, svint64_t indi
    return SVE_ACLE_FUNC(svst1w_scatter_,s64,index,_s64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_s64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_s64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1w_scatter_s64index_u64(svbool_t pg, uint32_t *base, svint64_t indices, svuint64_t data)
@@ -188,11 +182,11 @@ void test_svst1w_scatter_s64index_u64(svbool_t pg, uint32_t *base, svint64_t ind
    return SVE_ACLE_FUNC(svst1w_scatter_,s64,index,_u64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_u64index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_u64index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1w_scatter_u64index_s64(svbool_t pg, int32_t *base, svuint64_t indices, svint64_t data)
@@ -200,11 +194,11 @@ void test_svst1w_scatter_u64index_s64(svbool_t pg, int32_t *base, svuint64_t ind
    return SVE_ACLE_FUNC(svst1w_scatter_,u64,index,_s64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_u64index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_u64index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], i32* [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
  // CHECK-NEXT:    ret void
  //
  void test_svst1w_scatter_u64index_u64(svbool_t pg, uint32_t *base, svuint64_t indices, svuint64_t data)
@@ -212,7 +206,7 @@ void test_svst1w_scatter_u64index_u64(svbool_t pg, uint32_t *base, svuint64_t in
    return SVE_ACLE_FUNC(svst1w_scatter_,u64,index,_u64)(pg, base, indices, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_u64base_index_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_u64base_index_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -225,7 +219,7 @@ void test_svst1w_scatter_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_
    return SVE_ACLE_FUNC(svst1w_scatter,_u64base,_index,_s64)(pg, bases, index, data);
  }
  
-// CHECK-LABEL: @test_svst1w_scatter_u64base_index_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst1w_scatter_u64base_index_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c

index 1f54f5f..7b7cf3a 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -14,20 +14,20 @@
  #else
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
-// CHECK-LABEL: @test_svst2_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z15test_svst2_bf16u10__SVBool_tPu6__bf1614svbfloat16x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svst2_bf16u10__SVBool_tPu6__bf1614svbfloat16x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data)
@@ -35,24 +35,22 @@ void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data)
    return SVE_ACLE_FUNC(svst2,_bf16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst2_vnum_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z20test_svst2_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svst2_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x2_t data)
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c

index c9f4fbd..ff7ae76 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,18 +14,18 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svst2_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_s8(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA]], i64 16)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svst2_s8u10__SVBool_tPa10svint8x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svst2_s8u10__SVBool_tPa10svint8x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_s8(svbool_t pg, int8_t *base, svint8x2_t data)
@@ -33,20 +33,20 @@ void test_svst2_s8(svbool_t pg, int8_t *base, svint8x2_t data)
    return SVE_ACLE_FUNC(svst2,_s8,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst2_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst2_s16u10__SVBool_tPs11svint16x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_s16u10__SVBool_tPs11svint16x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_s16(svbool_t pg, int16_t *base, svint16x2_t data)
@@ -54,20 +54,20 @@ void test_svst2_s16(svbool_t pg, int16_t *base, svint16x2_t data)
    return SVE_ACLE_FUNC(svst2,_s16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst2_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA]], i64 4)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst2_s32u10__SVBool_tPi11svint32x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_s32u10__SVBool_tPi11svint32x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_s32(svbool_t pg, int32_t *base, svint32x2_t data)
@@ -75,20 +75,20 @@ void test_svst2_s32(svbool_t pg, int32_t *base, svint32x2_t data)
    return SVE_ACLE_FUNC(svst2,_s32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst2_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA]], i64 2)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst2_s64u10__SVBool_tPl11svint64x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_s64u10__SVBool_tPl11svint64x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA]], i64 2)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_s64(svbool_t pg, int64_t *base, svint64x2_t data)
@@ -96,18 +96,18 @@ void test_svst2_s64(svbool_t pg, int64_t *base, svint64x2_t data)
    return SVE_ACLE_FUNC(svst2,_s64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst2_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_u8(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA]], i64 16)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svst2_u8u10__SVBool_tPh11svuint8x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svst2_u8u10__SVBool_tPh11svuint8x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_u8(svbool_t pg, uint8_t *base, svuint8x2_t data)
@@ -115,20 +115,20 @@ void test_svst2_u8(svbool_t pg, uint8_t *base, svuint8x2_t data)
    return SVE_ACLE_FUNC(svst2,_u8,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst2_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst2_u16u10__SVBool_tPt12svuint16x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_u16u10__SVBool_tPt12svuint16x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_u16(svbool_t pg, uint16_t *base, svuint16x2_t data)
@@ -136,20 +136,20 @@ void test_svst2_u16(svbool_t pg, uint16_t *base, svuint16x2_t data)
    return SVE_ACLE_FUNC(svst2,_u16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst2_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA]], i64 4)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst2_u32u10__SVBool_tPj12svuint32x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_u32u10__SVBool_tPj12svuint32x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_u32(svbool_t pg, uint32_t *base, svuint32x2_t data)
@@ -157,20 +157,20 @@ void test_svst2_u32(svbool_t pg, uint32_t *base, svuint32x2_t data)
    return SVE_ACLE_FUNC(svst2,_u32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst2_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA]], i64 2)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst2_u64u10__SVBool_tPm12svuint64x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_u64u10__SVBool_tPm12svuint64x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA]], i64 2)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_u64(svbool_t pg, uint64_t *base, svuint64x2_t data)
@@ -178,20 +178,20 @@ void test_svst2_u64(svbool_t pg, uint64_t *base, svuint64x2_t data)
    return SVE_ACLE_FUNC(svst2,_u64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst2_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst2_f16u10__SVBool_tPDh13svfloat16x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_f16u10__SVBool_tPDh13svfloat16x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_f16(svbool_t pg, float16_t *base, svfloat16x2_t data)
@@ -199,20 +199,20 @@ void test_svst2_f16(svbool_t pg, float16_t *base, svfloat16x2_t data)
    return SVE_ACLE_FUNC(svst2,_f16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst2_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[DATA]], i64 4)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst2_f32u10__SVBool_tPf13svfloat32x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_f32u10__SVBool_tPf13svfloat32x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_f32(svbool_t pg, float32_t *base, svfloat32x2_t data)
@@ -220,20 +220,20 @@ void test_svst2_f32(svbool_t pg, float32_t *base, svfloat32x2_t data)
    return SVE_ACLE_FUNC(svst2,_f32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst2_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[DATA]], i64 2)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst2_f64u10__SVBool_tPd13svfloat64x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_f64u10__SVBool_tPd13svfloat64x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[DATA]], i64 2)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data)
@@ -241,22 +241,20 @@ void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data)
    return SVE_ACLE_FUNC(svst2,_f64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst2_vnum_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA]], i64 16)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z18test_svst2_vnum_s8u10__SVBool_tPal10svint8x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst2_vnum_s8u10__SVBool_tPal10svint8x2_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x2_t data)
@@ -264,24 +262,22 @@ void test_svst2_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x2_t data
    return SVE_ACLE_FUNC(svst2_vnum,_s8,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst2_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst2_vnum_s16u10__SVBool_tPsl11svint16x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_s16u10__SVBool_tPsl11svint16x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x2_t data)
@@ -289,24 +285,22 @@ void test_svst2_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x2_t d
    return SVE_ACLE_FUNC(svst2_vnum,_s16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst2_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA]], i64 4)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst2_vnum_s32u10__SVBool_tPil11svint32x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_s32u10__SVBool_tPil11svint32x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x2_t data)
@@ -314,24 +308,22 @@ void test_svst2_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x2_t d
    return SVE_ACLE_FUNC(svst2_vnum,_s32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst2_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA]], i64 2)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst2_vnum_s64u10__SVBool_tPll11svint64x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_s64u10__SVBool_tPll11svint64x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA]], i64 2)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x2_t data)
@@ -339,22 +331,20 @@ void test_svst2_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x2_t d
    return SVE_ACLE_FUNC(svst2_vnum,_s64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst2_vnum_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA]], i64 16)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z18test_svst2_vnum_u8u10__SVBool_tPhl11svuint8x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst2_vnum_u8u10__SVBool_tPhl11svuint8x2_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x2_t data)
@@ -362,24 +352,22 @@ void test_svst2_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x2_t da
    return SVE_ACLE_FUNC(svst2_vnum,_u8,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst2_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst2_vnum_u16u10__SVBool_tPtl12svuint16x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_u16u10__SVBool_tPtl12svuint16x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x2_t data)
@@ -387,24 +375,22 @@ void test_svst2_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x2_t
    return SVE_ACLE_FUNC(svst2_vnum,_u16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst2_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA]], i64 4)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst2_vnum_u32u10__SVBool_tPjl12svuint32x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_u32u10__SVBool_tPjl12svuint32x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x2_t data)
@@ -412,24 +398,22 @@ void test_svst2_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x2_t
    return SVE_ACLE_FUNC(svst2_vnum,_u32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst2_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA]], i64 2)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst2_vnum_u64u10__SVBool_tPml12svuint64x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_u64u10__SVBool_tPml12svuint64x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA]], i64 2)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[DATA]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x2_t data)
@@ -437,24 +421,22 @@ void test_svst2_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x2_t
    return SVE_ACLE_FUNC(svst2_vnum,_u64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst2_vnum_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[DATA]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst2_vnum_f16u10__SVBool_tPDhl13svfloat16x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_f16u10__SVBool_tPDhl13svfloat16x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x2_t data)
@@ -462,24 +444,22 @@ void test_svst2_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x2
    return SVE_ACLE_FUNC(svst2_vnum,_f16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst2_vnum_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[DATA]], i64 4)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[DATA]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst2_vnum_f32u10__SVBool_tPfl13svfloat32x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_f32u10__SVBool_tPfl13svfloat32x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x2_t data)
@@ -487,24 +467,22 @@ void test_svst2_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x2
    return SVE_ACLE_FUNC(svst2_vnum,_f32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst2_vnum_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[DATA]], i64 2)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[DATA]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst2_vnum_f64u10__SVBool_tPdl13svfloat64x2_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_f64u10__SVBool_tPdl13svfloat64x2_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[DATA]], i64 2)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[DATA]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst2_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x2_t data)
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c

index a2879ca..356ad3e 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -15,22 +15,22 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svst3_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA]], i64 8)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA]], i64 16)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z15test_svst3_bf16u10__SVBool_tPu6__bf1614svbfloat16x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svst3_bf16u10__SVBool_tPu6__bf1614svbfloat16x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA]], i64 8)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x3_t data)
@@ -38,26 +38,24 @@ void test_svst3_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x3_t data)
    return SVE_ACLE_FUNC(svst3,_bf16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst3_vnum_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA]], i64 8)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA]], i64 16)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z20test_svst3_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svst3_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x3_t data)
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c

index 214953a..a5c072c 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,20 +14,20 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svst3_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_s8(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 16)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 32)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svst3_s8u10__SVBool_tPa10svint8x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svst3_s8u10__SVBool_tPa10svint8x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 16)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 32)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_s8(svbool_t pg, int8_t *base, svint8x3_t data)
@@ -35,22 +35,22 @@ void test_svst3_s8(svbool_t pg, int8_t *base, svint8x3_t data)
    return SVE_ACLE_FUNC(svst3,_s8,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst3_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 8)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 16)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst3_s16u10__SVBool_tPs11svint16x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_s16u10__SVBool_tPs11svint16x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 8)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_s16(svbool_t pg, int16_t *base, svint16x3_t data)
@@ -58,22 +58,22 @@ void test_svst3_s16(svbool_t pg, int16_t *base, svint16x3_t data)
    return SVE_ACLE_FUNC(svst3,_s16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst3_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 4)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst3_s32u10__SVBool_tPi11svint32x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_s32u10__SVBool_tPi11svint32x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 4)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_s32(svbool_t pg, int32_t *base, svint32x3_t data)
@@ -81,22 +81,22 @@ void test_svst3_s32(svbool_t pg, int32_t *base, svint32x3_t data)
    return SVE_ACLE_FUNC(svst3,_s32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst3_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 2)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 4)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst3_s64u10__SVBool_tPl11svint64x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_s64u10__SVBool_tPl11svint64x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 2)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_s64(svbool_t pg, int64_t *base, svint64x3_t data)
@@ -104,20 +104,20 @@ void test_svst3_s64(svbool_t pg, int64_t *base, svint64x3_t data)
    return SVE_ACLE_FUNC(svst3,_s64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst3_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_u8(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 16)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 32)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svst3_u8u10__SVBool_tPh11svuint8x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svst3_u8u10__SVBool_tPh11svuint8x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 16)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 32)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_u8(svbool_t pg, uint8_t *base, svuint8x3_t data)
@@ -125,22 +125,22 @@ void test_svst3_u8(svbool_t pg, uint8_t *base, svuint8x3_t data)
    return SVE_ACLE_FUNC(svst3,_u8,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst3_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 8)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 16)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst3_u16u10__SVBool_tPt12svuint16x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_u16u10__SVBool_tPt12svuint16x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 8)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_u16(svbool_t pg, uint16_t *base, svuint16x3_t data)
@@ -148,22 +148,22 @@ void test_svst3_u16(svbool_t pg, uint16_t *base, svuint16x3_t data)
    return SVE_ACLE_FUNC(svst3,_u16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst3_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 4)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst3_u32u10__SVBool_tPj12svuint32x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_u32u10__SVBool_tPj12svuint32x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 4)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_u32(svbool_t pg, uint32_t *base, svuint32x3_t data)
@@ -171,22 +171,22 @@ void test_svst3_u32(svbool_t pg, uint32_t *base, svuint32x3_t data)
    return SVE_ACLE_FUNC(svst3,_u32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst3_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 2)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 4)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst3_u64u10__SVBool_tPm12svuint64x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_u64u10__SVBool_tPm12svuint64x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 2)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_u64(svbool_t pg, uint64_t *base, svuint64x3_t data)
@@ -194,22 +194,22 @@ void test_svst3_u64(svbool_t pg, uint64_t *base, svuint64x3_t data)
    return SVE_ACLE_FUNC(svst3,_u64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst3_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA]], i64 8)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA]], i64 16)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst3_f16u10__SVBool_tPDh13svfloat16x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_f16u10__SVBool_tPDh13svfloat16x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA]], i64 8)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_f16(svbool_t pg, float16_t *base, svfloat16x3_t data)
@@ -217,22 +217,22 @@ void test_svst3_f16(svbool_t pg, float16_t *base, svfloat16x3_t data)
    return SVE_ACLE_FUNC(svst3,_f16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst3_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA]], i64 4)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst3_f32u10__SVBool_tPf13svfloat32x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_f32u10__SVBool_tPf13svfloat32x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA]], i64 4)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_f32(svbool_t pg, float32_t *base, svfloat32x3_t data)
@@ -240,22 +240,22 @@ void test_svst3_f32(svbool_t pg, float32_t *base, svfloat32x3_t data)
    return SVE_ACLE_FUNC(svst3,_f32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst3_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA]], i64 2)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA]], i64 4)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst3_f64u10__SVBool_tPd13svfloat64x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_f64u10__SVBool_tPd13svfloat64x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA]], i64 2)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data)
@@ -263,24 +263,22 @@ void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data)
    return SVE_ACLE_FUNC(svst3,_f64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst3_vnum_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 16)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 32)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 32)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z18test_svst3_vnum_s8u10__SVBool_tPal10svint8x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst3_vnum_s8u10__SVBool_tPal10svint8x3_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 32)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 32)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x3_t data)
@@ -288,26 +286,24 @@ void test_svst3_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x3_t data
    return SVE_ACLE_FUNC(svst3_vnum,_s8,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst3_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 8)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 16)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst3_vnum_s16u10__SVBool_tPsl11svint16x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_s16u10__SVBool_tPsl11svint16x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x3_t data)
@@ -315,26 +311,24 @@ void test_svst3_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x3_t d
    return SVE_ACLE_FUNC(svst3_vnum,_s16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst3_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 4)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst3_vnum_s32u10__SVBool_tPil11svint32x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_s32u10__SVBool_tPil11svint32x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x3_t data)
@@ -342,26 +336,24 @@ void test_svst3_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x3_t d
    return SVE_ACLE_FUNC(svst3_vnum,_s32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst3_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 2)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 4)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 2)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst3_vnum_s64u10__SVBool_tPll11svint64x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_s64u10__SVBool_tPll11svint64x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 2)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x3_t data)
@@ -369,24 +361,22 @@ void test_svst3_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x3_t d
    return SVE_ACLE_FUNC(svst3_vnum,_s64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst3_vnum_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 16)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 32)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 32)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z18test_svst3_vnum_u8u10__SVBool_tPhl11svuint8x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst3_vnum_u8u10__SVBool_tPhl11svuint8x3_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 32)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv48i8(<vscale x 48 x i8> [[DATA]], i64 32)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x3_t data)
@@ -394,26 +384,24 @@ void test_svst3_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x3_t da
    return SVE_ACLE_FUNC(svst3_vnum,_u8,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst3_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 8)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 16)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst3_vnum_u16u10__SVBool_tPtl12svuint16x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_u16u10__SVBool_tPtl12svuint16x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv24i16(<vscale x 24 x i16> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x3_t data)
@@ -421,26 +409,24 @@ void test_svst3_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x3_t
    return SVE_ACLE_FUNC(svst3_vnum,_u16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst3_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 4)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst3_vnum_u32u10__SVBool_tPjl12svuint32x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_u32u10__SVBool_tPjl12svuint32x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv12i32(<vscale x 12 x i32> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x3_t data)
@@ -448,26 +434,24 @@ void test_svst3_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x3_t
    return SVE_ACLE_FUNC(svst3_vnum,_u32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst3_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 2)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 4)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 2)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst3_vnum_u64u10__SVBool_tPml12svuint64x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_u64u10__SVBool_tPml12svuint64x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 2)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv6i64(<vscale x 6 x i64> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x3_t data)
@@ -475,26 +459,24 @@ void test_svst3_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x3_t
    return SVE_ACLE_FUNC(svst3_vnum,_u64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst3_vnum_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA]], i64 8)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA]], i64 16)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst3_vnum_f16u10__SVBool_tPDhl13svfloat16x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_f16u10__SVBool_tPDhl13svfloat16x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv24f16(<vscale x 24 x half> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x3_t data)
@@ -502,26 +484,24 @@ void test_svst3_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x3
    return SVE_ACLE_FUNC(svst3_vnum,_f16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst3_vnum_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA]], i64 4)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA]], i64 8)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst3_vnum_f32u10__SVBool_tPfl13svfloat32x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_f32u10__SVBool_tPfl13svfloat32x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv12f32(<vscale x 12 x float> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x3_t data)
@@ -529,26 +509,24 @@ void test_svst3_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x3
    return SVE_ACLE_FUNC(svst3_vnum,_f32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst3_vnum_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA]], i64 2)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA]], i64 4)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA]], i64 2)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst3_vnum_f64u10__SVBool_tPdl13svfloat64x3_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_f64u10__SVBool_tPdl13svfloat64x3_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA]], i64 2)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv6f64(<vscale x 6 x double> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst3_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x3_t data)
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c

index 60508fa..b580e3a 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -15,24 +15,24 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svst4_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 8)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 16)
  // CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 24)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z15test_svst4_bf16u10__SVBool_tPu6__bf1614svbfloat16x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svst4_bf16u10__SVBool_tPu6__bf1614svbfloat16x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 8)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 16)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 24)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x4_t data)
@@ -40,28 +40,26 @@ void test_svst4_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x4_t data)
    return SVE_ACLE_FUNC(svst4,_bf16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst4_vnum_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 8)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 16)
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 24)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x bfloat> [[TMP6]], <vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 16)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z20test_svst4_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svst4_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 24)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x bfloat> [[TMP6]], <vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[DATA]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x4_t data)
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c

index c2be512..0b4311a 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
  #include <arm_sve.h>
  
  #ifdef SVE_OVERLOADED_FORMS
@@ -14,22 +14,22 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svst4_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_s8(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 16)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 32)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 48)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svst4_s8u10__SVBool_tPa10svint8x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svst4_s8u10__SVBool_tPa10svint8x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 16)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 32)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 48)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_s8(svbool_t pg, int8_t *base, svint8x4_t data)
@@ -37,24 +37,24 @@ void test_svst4_s8(svbool_t pg, int8_t *base, svint8x4_t data)
    return SVE_ACLE_FUNC(svst4,_s8,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst4_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 8)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 16)
  // CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 24)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst4_s16u10__SVBool_tPs11svint16x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_s16u10__SVBool_tPs11svint16x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 8)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 16)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 24)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_s16(svbool_t pg, int16_t *base, svint16x4_t data)
@@ -62,24 +62,24 @@ void test_svst4_s16(svbool_t pg, int16_t *base, svint16x4_t data)
    return SVE_ACLE_FUNC(svst4,_s16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst4_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 4)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 8)
  // CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 12)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst4_s32u10__SVBool_tPi11svint32x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_s32u10__SVBool_tPi11svint32x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 4)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 8)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 12)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_s32(svbool_t pg, int32_t *base, svint32x4_t data)
@@ -87,24 +87,24 @@ void test_svst4_s32(svbool_t pg, int32_t *base, svint32x4_t data)
    return SVE_ACLE_FUNC(svst4,_s32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst4_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 2)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 4)
  // CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 6)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst4_s64u10__SVBool_tPl11svint64x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_s64u10__SVBool_tPl11svint64x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 2)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 4)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 6)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_s64(svbool_t pg, int64_t *base, svint64x4_t data)
@@ -112,22 +112,22 @@ void test_svst4_s64(svbool_t pg, int64_t *base, svint64x4_t data)
    return SVE_ACLE_FUNC(svst4,_s64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst4_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_u8(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 16)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 32)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 48)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z13test_svst4_u8u10__SVBool_tPh11svuint8x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z13test_svst4_u8u10__SVBool_tPh11svuint8x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 16)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 32)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 48)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_u8(svbool_t pg, uint8_t *base, svuint8x4_t data)
@@ -135,24 +135,24 @@ void test_svst4_u8(svbool_t pg, uint8_t *base, svuint8x4_t data)
    return SVE_ACLE_FUNC(svst4,_u8,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst4_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 8)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 16)
  // CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 24)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst4_u16u10__SVBool_tPt12svuint16x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_u16u10__SVBool_tPt12svuint16x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 8)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 16)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 24)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_u16(svbool_t pg, uint16_t *base, svuint16x4_t data)
@@ -160,24 +160,24 @@ void test_svst4_u16(svbool_t pg, uint16_t *base, svuint16x4_t data)
    return SVE_ACLE_FUNC(svst4,_u16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst4_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 4)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 8)
  // CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 12)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst4_u32u10__SVBool_tPj12svuint32x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_u32u10__SVBool_tPj12svuint32x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 4)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 8)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 12)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_u32(svbool_t pg, uint32_t *base, svuint32x4_t data)
@@ -185,24 +185,24 @@ void test_svst4_u32(svbool_t pg, uint32_t *base, svuint32x4_t data)
    return SVE_ACLE_FUNC(svst4,_u32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst4_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 2)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 4)
  // CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 6)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst4_u64u10__SVBool_tPm12svuint64x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_u64u10__SVBool_tPm12svuint64x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 2)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 4)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 6)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_u64(svbool_t pg, uint64_t *base, svuint64x4_t data)
@@ -210,24 +210,24 @@ void test_svst4_u64(svbool_t pg, uint64_t *base, svuint64x4_t data)
    return SVE_ACLE_FUNC(svst4,_u64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst4_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 8)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 16)
  // CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 24)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst4_f16u10__SVBool_tPDh13svfloat16x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_f16u10__SVBool_tPDh13svfloat16x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 8)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 16)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 24)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_f16(svbool_t pg, float16_t *base, svfloat16x4_t data)
@@ -235,24 +235,24 @@ void test_svst4_f16(svbool_t pg, float16_t *base, svfloat16x4_t data)
    return SVE_ACLE_FUNC(svst4,_f16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst4_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 4)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 8)
  // CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 12)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst4_f32u10__SVBool_tPf13svfloat32x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_f32u10__SVBool_tPf13svfloat32x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 4)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 8)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 12)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_f32(svbool_t pg, float32_t *base, svfloat32x4_t data)
@@ -260,24 +260,24 @@ void test_svst4_f32(svbool_t pg, float32_t *base, svfloat32x4_t data)
    return SVE_ACLE_FUNC(svst4,_f32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst4_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA:%.*]], i64 0)
  // CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 2)
  // CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 4)
  // CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 6)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z14test_svst4_f64u10__SVBool_tPd13svfloat64x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_f64u10__SVBool_tPd13svfloat64x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
  // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA:%.*]], i64 0)
  // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 2)
  // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 4)
  // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 6)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data)
@@ -285,26 +285,24 @@ void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data)
    return SVE_ACLE_FUNC(svst4,_f64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svst4_vnum_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 16)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 32)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 48)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 32)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 48)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z18test_svst4_vnum_s8u10__SVBool_tPal10svint8x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst4_vnum_s8u10__SVBool_tPal10svint8x4_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 32)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 48)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 48)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x4_t data)
@@ -312,28 +310,26 @@ void test_svst4_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x4_t data
    return SVE_ACLE_FUNC(svst4_vnum,_s8,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst4_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 8)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 16)
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 24)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 16)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst4_vnum_s16u10__SVBool_tPsl11svint16x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_s16u10__SVBool_tPsl11svint16x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 24)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x4_t data)
@@ -341,28 +337,26 @@ void test_svst4_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x4_t d
    return SVE_ACLE_FUNC(svst4_vnum,_s16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst4_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 4)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 8)
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 12)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 8)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst4_vnum_s32u10__SVBool_tPil11svint32x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_s32u10__SVBool_tPil11svint32x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 12)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x4_t data)
@@ -370,28 +364,26 @@ void test_svst4_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x4_t d
    return SVE_ACLE_FUNC(svst4_vnum,_s32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst4_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 2)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 4)
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 6)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 2)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 4)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst4_vnum_s64u10__SVBool_tPll11svint64x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_s64u10__SVBool_tPll11svint64x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 2)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 6)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x4_t data)
@@ -399,26 +391,24 @@ void test_svst4_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x4_t d
    return SVE_ACLE_FUNC(svst4_vnum,_s64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst4_vnum_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 16)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 32)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 48)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 32)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 48)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z18test_svst4_vnum_u8u10__SVBool_tPhl11svuint8x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst4_vnum_u8u10__SVBool_tPhl11svuint8x4_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 32)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 48)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[DATA]], i64 48)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x4_t data)
@@ -426,28 +416,26 @@ void test_svst4_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x4_t da
    return SVE_ACLE_FUNC(svst4_vnum,_u8,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst4_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 8)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 16)
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 24)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 16)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst4_vnum_u16u10__SVBool_tPtl12svuint16x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_u16u10__SVBool_tPtl12svuint16x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 24)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[DATA]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x4_t data)
@@ -455,28 +443,26 @@ void test_svst4_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x4_t
    return SVE_ACLE_FUNC(svst4_vnum,_u16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst4_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 4)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 8)
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 12)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 8)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst4_vnum_u32u10__SVBool_tPjl12svuint32x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_u32u10__SVBool_tPjl12svuint32x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 12)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[DATA]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x4_t data)
@@ -484,28 +470,26 @@ void test_svst4_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x4_t
    return SVE_ACLE_FUNC(svst4_vnum,_u32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst4_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 2)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 4)
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 6)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 2)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 4)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst4_vnum_u64u10__SVBool_tPml12svuint64x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_u64u10__SVBool_tPml12svuint64x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 2)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 6)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[DATA]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x4_t data)
@@ -513,28 +497,26 @@ void test_svst4_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x4_t
    return SVE_ACLE_FUNC(svst4_vnum,_u64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst4_vnum_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 8)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 16)
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 24)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], <vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 16)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst4_vnum_f16u10__SVBool_tPDhl13svfloat16x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_f16u10__SVBool_tPDhl13svfloat16x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 16)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 24)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], <vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[DATA]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x4_t data)
@@ -542,28 +524,26 @@ void test_svst4_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x4
    return SVE_ACLE_FUNC(svst4_vnum,_f16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst4_vnum_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 4)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 8)
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 12)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], <vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 8)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst4_vnum_f32u10__SVBool_tPfl13svfloat32x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_f32u10__SVBool_tPfl13svfloat32x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 8)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 12)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], <vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[DATA]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x4_t data)
@@ -571,28 +551,26 @@ void test_svst4_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x4
    return SVE_ACLE_FUNC(svst4_vnum,_f32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svst4_vnum_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA:%.*]], i64 0)
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 2)
-// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 4)
-// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 6)
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 2)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 4)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z19test_svst4_vnum_f64u10__SVBool_tPdl13svfloat64x4_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_f64u10__SVBool_tPdl13svfloat64x4_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA:%.*]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 2)
-// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 4)
-// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 6)
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[DATA]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svst4_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x4_t data)
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1-bfloat.c

index 3a749be..34d7ce8 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1-bfloat.c
@@ -1,8 +1,8 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
  
  // REQUIRES: aarch64-registered-target
  
@@ -15,16 +15,16 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svstnt1_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z17test_svstnt1_bf16u10__SVBool_tPu6__bf16u14__SVBFloat16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z17test_svstnt1_bf16u10__SVBool_tPu6__bf16u14__SVBFloat16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data)
@@ -32,20 +32,18 @@ void test_svstnt1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data)
    return SVE_ACLE_FUNC(svstnt1,_bf16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_vnum_bf16(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_vnum_bf16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z22test_svstnt1_vnum_bf16u10__SVBool_tPu6__bf16lu14__SVBFloat16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z22test_svstnt1_vnum_bf16u10__SVBool_tPu6__bf16lu14__SVBFloat16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16_t data)
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c

index 420fc0e..f9f9e40 100644 (file)
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c
@@ -1,10 +1,10 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
  // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
  
  #include <arm_sve.h>
  
@@ -15,14 +15,14 @@
  #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
  #endif
  
-// CHECK-LABEL: @test_svstnt1_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z15test_svstnt1_s8u10__SVBool_tPau10__SVInt8_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svstnt1_s8u10__SVBool_tPau10__SVInt8_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_s8(svbool_t pg, int8_t *base, svint8_t data)
@@ -30,16 +30,16 @@ void test_svstnt1_s8(svbool_t pg, int8_t *base, svint8_t data)
    return SVE_ACLE_FUNC(svstnt1,_s8,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z16test_svstnt1_s16u10__SVBool_tPsu11__SVInt16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svstnt1_s16u10__SVBool_tPsu11__SVInt16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_s16(svbool_t pg, int16_t *base, svint16_t data)
@@ -47,16 +47,16 @@ void test_svstnt1_s16(svbool_t pg, int16_t *base, svint16_t data)
    return SVE_ACLE_FUNC(svstnt1,_s16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z16test_svstnt1_s32u10__SVBool_tPiu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svstnt1_s32u10__SVBool_tPiu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_s32(svbool_t pg, int32_t *base, svint32_t data)
@@ -64,16 +64,16 @@ void test_svstnt1_s32(svbool_t pg, int32_t *base, svint32_t data)
    return SVE_ACLE_FUNC(svstnt1,_s32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z16test_svstnt1_s64u10__SVBool_tPlu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svstnt1_s64u10__SVBool_tPlu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_s64(svbool_t pg, int64_t *base, svint64_t data)
@@ -81,14 +81,14 @@ void test_svstnt1_s64(svbool_t pg, int64_t *base, svint64_t data)
    return SVE_ACLE_FUNC(svstnt1,_s64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z15test_svstnt1_u8u10__SVBool_tPhu11__SVUint8_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svstnt1_u8u10__SVBool_tPhu11__SVUint8_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_u8(svbool_t pg, uint8_t *base, svuint8_t data)
@@ -96,16 +96,16 @@ void test_svstnt1_u8(svbool_t pg, uint8_t *base, svuint8_t data)
    return SVE_ACLE_FUNC(svstnt1,_u8,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z16test_svstnt1_u16u10__SVBool_tPtu12__SVUint16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svstnt1_u16u10__SVBool_tPtu12__SVUint16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_u16(svbool_t pg, uint16_t *base, svuint16_t data)
@@ -113,16 +113,16 @@ void test_svstnt1_u16(svbool_t pg, uint16_t *base, svuint16_t data)
    return SVE_ACLE_FUNC(svstnt1,_u16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z16test_svstnt1_u32u10__SVBool_tPju12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svstnt1_u32u10__SVBool_tPju12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_u32(svbool_t pg, uint32_t *base, svuint32_t data)
@@ -130,16 +130,16 @@ void test_svstnt1_u32(svbool_t pg, uint32_t *base, svuint32_t data)
    return SVE_ACLE_FUNC(svstnt1,_u32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z16test_svstnt1_u64u10__SVBool_tPmu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svstnt1_u64u10__SVBool_tPmu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_u64(svbool_t pg, uint64_t *base, svuint64_t data)
@@ -147,16 +147,16 @@ void test_svstnt1_u64(svbool_t pg, uint64_t *base, svuint64_t data)
    return SVE_ACLE_FUNC(svstnt1,_u64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z16test_svstnt1_f16u10__SVBool_tPDhu13__SVFloat16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svstnt1_f16u10__SVBool_tPDhu13__SVFloat16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_f16(svbool_t pg, float16_t *base, svfloat16_t data)
@@ -164,16 +164,16 @@ void test_svstnt1_f16(svbool_t pg, float16_t *base, svfloat16_t data)
    return SVE_ACLE_FUNC(svstnt1,_f16,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z16test_svstnt1_f32u10__SVBool_tPfu13__SVFloat32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svstnt1_f32u10__SVBool_tPfu13__SVFloat32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_f32(svbool_t pg, float32_t *base, svfloat32_t data)
@@ -181,16 +181,16 @@ void test_svstnt1_f32(svbool_t pg, float32_t *base, svfloat32_t data)
    return SVE_ACLE_FUNC(svstnt1,_f32,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z16test_svstnt1_f64u10__SVBool_tPdu13__SVFloat64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z16test_svstnt1_f64u10__SVBool_tPdu13__SVFloat64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_f64(svbool_t pg, float64_t *base, svfloat64_t data)
@@ -198,18 +198,16 @@ void test_svstnt1_f64(svbool_t pg, float64_t *base, svfloat64_t data)
    return SVE_ACLE_FUNC(svstnt1,_f64,,)(pg, base, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_vnum_s8(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_vnum_s8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z20test_svstnt1_vnum_s8u10__SVBool_tPalu10__SVInt8_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svstnt1_vnum_s8u10__SVBool_tPalu10__SVInt8_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data)
@@ -217,20 +215,18 @@ void test_svstnt1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data
    return SVE_ACLE_FUNC(svstnt1_vnum,_s8,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_vnum_s16(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_vnum_s16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z21test_svstnt1_vnum_s16u10__SVBool_tPslu11__SVInt16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svstnt1_vnum_s16u10__SVBool_tPslu11__SVInt16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data)
@@ -238,20 +234,18 @@ void test_svstnt1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t d
    return SVE_ACLE_FUNC(svstnt1_vnum,_s16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_vnum_s32(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_vnum_s32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z21test_svstnt1_vnum_s32u10__SVBool_tPilu11__SVInt32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svstnt1_vnum_s32u10__SVBool_tPilu11__SVInt32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data)
@@ -259,20 +253,18 @@ void test_svstnt1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t d
    return SVE_ACLE_FUNC(svstnt1_vnum,_s32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_vnum_s64(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_vnum_s64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z21test_svstnt1_vnum_s64u10__SVBool_tPllu11__SVInt64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svstnt1_vnum_s64u10__SVBool_tPllu11__SVInt64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data)
@@ -280,18 +272,16 @@ void test_svstnt1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t d
    return SVE_ACLE_FUNC(svstnt1_vnum,_s64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_vnum_u8(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_vnum_u8(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z20test_svstnt1_vnum_u8u10__SVBool_tPhlu11__SVUint8_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svstnt1_vnum_u8u10__SVBool_tPhlu11__SVUint8_t(
  // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data)
@@ -299,20 +289,18 @@ void test_svstnt1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t da
    return SVE_ACLE_FUNC(svstnt1_vnum,_u8,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_vnum_u16(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_vnum_u16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z21test_svstnt1_vnum_u16u10__SVBool_tPtlu12__SVUint16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svstnt1_vnum_u16u10__SVBool_tPtlu12__SVUint16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data)
@@ -320,20 +308,18 @@ void test_svstnt1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t
    return SVE_ACLE_FUNC(svstnt1_vnum,_u16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_vnum_u32(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_vnum_u32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z21test_svstnt1_vnum_u32u10__SVBool_tPjlu12__SVUint32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svstnt1_vnum_u32u10__SVBool_tPjlu12__SVUint32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data)
@@ -341,20 +327,18 @@ void test_svstnt1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t
    return SVE_ACLE_FUNC(svstnt1_vnum,_u32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_vnum_u64(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_vnum_u64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z21test_svstnt1_vnum_u64u10__SVBool_tPmlu12__SVUint64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svstnt1_vnum_u64u10__SVBool_tPmlu12__SVUint64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data)
@@ -362,20 +346,18 @@ void test_svstnt1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t
    return SVE_ACLE_FUNC(svstnt1_vnum,_u64,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_vnum_f16(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_vnum_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z21test_svstnt1_vnum_f16u10__SVBool_tPDhlu13__SVFloat16_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svstnt1_vnum_f16u10__SVBool_tPDhlu13__SVFloat16_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t data)
@@ -383,20 +365,18 @@ void test_svstnt1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16
    return SVE_ACLE_FUNC(svstnt1_vnum,_f16,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_vnum_f32(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_vnum_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z21test_svstnt1_vnum_f32u10__SVBool_tPflu13__SVFloat32_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svstnt1_vnum_f32u10__SVBool_tPflu13__SVFloat32_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data)
@@ -404,20 +384,18 @@ void test_svstnt1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32
    return SVE_ACLE_FUNC(svstnt1_vnum,_f32,,)(pg, base, vnum, data);
  }
  
-// CHECK-LABEL: @test_svstnt1_vnum_f64(
+// CHECK-LABEL: define {{[^@]+}}@test_svstnt1_vnum_f64(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CHECK-NEXT:    ret void
  //
-// CPP-CHECK-LABEL: @_Z21test_svstnt1_vnum_f64u10__SVBool_tPdlu13__SVFloat64_t(
+// CPP-CHECK-LABEL: define {{[^@]+}}@_Z21test_svstnt1_vnum_f64u10__SVBool_tPdlu13__SVFloat64_t(
  // CPP-CHECK-NEXT:  entry:
  // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CPP-CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
  // CPP-CHECK-NEXT:    ret void
  //
  void test_svstnt1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data)
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c

index 00d88ee..0d06327 100644 (file)
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
@@ -1,7 +1,7 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=2 -mvscale-max=2 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256
-// RUN: %clang_cc1 -no-opaque-pointers -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=2 -mvscale-max=2 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512
  
  // REQUIRES: aarch64-registered-target
  
@@ -28,24 +28,24 @@ DEFINE_STRUCT(bool)
  // int64
  //===----------------------------------------------------------------------===//
  
-// CHECK-128-LABEL: @read_int64(
+// CHECK-128-LABEL: define {{[^@]+}}@read_int64(
  // CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]]
  // CHECK-128-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> [[TMP0]], i64 0)
  // CHECK-128-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
  //
-// CHECK-256-LABEL: @read_int64(
+// CHECK-256-LABEL: define {{[^@]+}}@read_int64(
  // CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]]
  // CHECK-256-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP0]], i64 0)
  // CHECK-256-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
  //
-// CHECK-512-LABEL: @read_int64(
+// CHECK-512-LABEL: define {{[^@]+}}@read_int64(
  // CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]]
  // CHECK-512-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[TMP0]], i64 0)
  // CHECK-512-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
  //
@@ -53,25 +53,25 @@ svint64_t read_int64(struct struct_int64 *s) {
    return s->y[0];
  }
  
-// CHECK-128-LABEL: @write_int64(
+// CHECK-128-LABEL: define {{[^@]+}}@write_int64(
  // CHECK-128-NEXT:  entry:
  // CHECK-128-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[X:%.*]], i64 0)
-// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT:    store <2 x i64> [[CASTFIXEDSVE]], <2 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-128-NEXT:    store <2 x i64> [[CASTFIXEDSVE]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-128-NEXT:    ret void
  //
-// CHECK-256-LABEL: @write_int64(
+// CHECK-256-LABEL: define {{[^@]+}}@write_int64(
  // CHECK-256-NEXT:  entry:
  // CHECK-256-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[X:%.*]], i64 0)
-// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT:    store <4 x i64> [[CASTFIXEDSVE]], <4 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-256-NEXT:    store <4 x i64> [[CASTFIXEDSVE]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-256-NEXT:    ret void
  //
-// CHECK-512-LABEL: @write_int64(
+// CHECK-512-LABEL: define {{[^@]+}}@write_int64(
  // CHECK-512-NEXT:  entry:
  // CHECK-512-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[X:%.*]], i64 0)
-// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT:    store <8 x i64> [[CASTFIXEDSVE]], <8 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-512-NEXT:    store <8 x i64> [[CASTFIXEDSVE]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-512-NEXT:    ret void
  //
  void write_int64(struct struct_int64 *s, svint64_t x) {
@@ -82,24 +82,24 @@ void write_int64(struct struct_int64 *s, svint64_t x) {
  // float64
  //===----------------------------------------------------------------------===//
  
-// CHECK-128-LABEL: @read_float64(
+// CHECK-128-LABEL: define {{[^@]+}}@read_float64(
  // CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-128-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> undef, <2 x double> [[TMP0]], i64 0)
  // CHECK-128-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
  //
-// CHECK-256-LABEL: @read_float64(
+// CHECK-256-LABEL: define {{[^@]+}}@read_float64(
  // CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x double>, <4 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-256-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> undef, <4 x double> [[TMP0]], i64 0)
  // CHECK-256-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
  //
-// CHECK-512-LABEL: @read_float64(
+// CHECK-512-LABEL: define {{[^@]+}}@read_float64(
  // CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x double>, ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-512-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[TMP0]], i64 0)
  // CHECK-512-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
  //
@@ -107,25 +107,25 @@ svfloat64_t read_float64(struct struct_float64 *s) {
    return s->y[0];
  }
  
-// CHECK-128-LABEL: @write_float64(
+// CHECK-128-LABEL: define {{[^@]+}}@write_float64(
  // CHECK-128-NEXT:  entry:
  // CHECK-128-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], i64 0)
-// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT:    store <2 x double> [[CASTFIXEDSVE]], <2 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-128-NEXT:    store <2 x double> [[CASTFIXEDSVE]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-128-NEXT:    ret void
  //
-// CHECK-256-LABEL: @write_float64(
+// CHECK-256-LABEL: define {{[^@]+}}@write_float64(
  // CHECK-256-NEXT:  entry:
  // CHECK-256-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <4 x double> @llvm.vector.extract.v4f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], i64 0)
-// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT:    store <4 x double> [[CASTFIXEDSVE]], <4 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-256-NEXT:    store <4 x double> [[CASTFIXEDSVE]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-256-NEXT:    ret void
  //
-// CHECK-512-LABEL: @write_float64(
+// CHECK-512-LABEL: define {{[^@]+}}@write_float64(
  // CHECK-512-NEXT:  entry:
  // CHECK-512-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], i64 0)
-// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT:    store <8 x double> [[CASTFIXEDSVE]], <8 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-512-NEXT:    store <8 x double> [[CASTFIXEDSVE]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-512-NEXT:    ret void
  //
  void write_float64(struct struct_float64 *s, svfloat64_t x) {
@@ -136,24 +136,24 @@ void write_float64(struct struct_float64 *s, svfloat64_t x) {
  // bfloat16
  //===----------------------------------------------------------------------===//
  
-// CHECK-128-LABEL: @read_bfloat16(
+// CHECK-128-LABEL: define {{[^@]+}}@read_bfloat16(
  // CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <8 x bfloat>, ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-128-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> undef, <8 x bfloat> [[TMP0]], i64 0)
  // CHECK-128-NEXT:    ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
  //
-// CHECK-256-LABEL: @read_bfloat16(
+// CHECK-256-LABEL: define {{[^@]+}}@read_bfloat16(
  // CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <16 x bfloat>, <16 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <16 x bfloat>, ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-256-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v16bf16(<vscale x 8 x bfloat> undef, <16 x bfloat> [[TMP0]], i64 0)
  // CHECK-256-NEXT:    ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
  //
-// CHECK-512-LABEL: @read_bfloat16(
+// CHECK-512-LABEL: define {{[^@]+}}@read_bfloat16(
  // CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT:    [[TMP0:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <32 x bfloat>, ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-512-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v32bf16(<vscale x 8 x bfloat> undef, <32 x bfloat> [[TMP0]], i64 0)
  // CHECK-512-NEXT:    ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
  //
@@ -161,25 +161,25 @@ svbfloat16_t read_bfloat16(struct struct_bfloat16 *s) {
    return s->y[0];
  }
  
-// CHECK-128-LABEL: @write_bfloat16(
+// CHECK-128-LABEL: define {{[^@]+}}@write_bfloat16(
  // CHECK-128-NEXT:  entry:
  // CHECK-128-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <8 x bfloat> @llvm.vector.extract.v8bf16.nxv8bf16(<vscale x 8 x bfloat> [[X:%.*]], i64 0)
-// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT:    store <8 x bfloat> [[CASTFIXEDSVE]], <8 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-128-NEXT:    store <8 x bfloat> [[CASTFIXEDSVE]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-128-NEXT:    ret void
  //
-// CHECK-256-LABEL: @write_bfloat16(
+// CHECK-256-LABEL: define {{[^@]+}}@write_bfloat16(
  // CHECK-256-NEXT:  entry:
  // CHECK-256-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <16 x bfloat> @llvm.vector.extract.v16bf16.nxv8bf16(<vscale x 8 x bfloat> [[X:%.*]], i64 0)
-// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT:    store <16 x bfloat> [[CASTFIXEDSVE]], <16 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-256-NEXT:    store <16 x bfloat> [[CASTFIXEDSVE]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-256-NEXT:    ret void
  //
-// CHECK-512-LABEL: @write_bfloat16(
+// CHECK-512-LABEL: define {{[^@]+}}@write_bfloat16(
  // CHECK-512-NEXT:  entry:
  // CHECK-512-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <32 x bfloat> @llvm.vector.extract.v32bf16.nxv8bf16(<vscale x 8 x bfloat> [[X:%.*]], i64 0)
-// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT:    store <32 x bfloat> [[CASTFIXEDSVE]], <32 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-512-NEXT:    store <32 x bfloat> [[CASTFIXEDSVE]], ptr [[Y]], align 16, !tbaa [[TBAA2]]
  // CHECK-512-NEXT:    ret void
  //
  void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) {
@@ -190,26 +190,26 @@ void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) {
  // bool
  //===----------------------------------------------------------------------===//
  
-// CHECK-128-LABEL: @read_bool(
+// CHECK-128-LABEL: define {{[^@]+}}@read_bool(
  // CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i8>, ptr [[Y]], align 2, !tbaa [[TBAA2]]
  // CHECK-128-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> [[TMP0]], i64 0)
  // CHECK-128-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
  // CHECK-128-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
  //
-// CHECK-256-LABEL: @read_bool(
+// CHECK-256-LABEL: define {{[^@]+}}@read_bool(
  // CHECK-256-NEXT:  entry:
-// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x i8>, <4 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[Y]], align 2, !tbaa [[TBAA2]]
  // CHECK-256-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> [[TMP0]], i64 0)
  // CHECK-256-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
  // CHECK-256-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
  //
-// CHECK-512-LABEL: @read_bool(
+// CHECK-512-LABEL: define {{[^@]+}}@read_bool(
  // CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 2, !tbaa [[TBAA2]]
  // CHECK-512-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[TMP0]], i64 0)
  // CHECK-512-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
  // CHECK-512-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
@@ -218,28 +218,28 @@ svbool_t read_bool(struct struct_bool *s) {
    return s->y[0];
  }
  
-// CHECK-128-LABEL: @write_bool(
+// CHECK-128-LABEL: define {{[^@]+}}@write_bool(
  // CHECK-128-NEXT:  entry:
  // CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[X:%.*]] to <vscale x 2 x i8>
  // CHECK-128-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> [[TMP0]], i64 0)
-// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT:    store <2 x i8> [[CASTFIXEDSVE]], <2 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-128-NEXT:    store <2 x i8> [[CASTFIXEDSVE]], ptr [[Y]], align 2, !tbaa [[TBAA2]]
  // CHECK-128-NEXT:    ret void
  //
-// CHECK-256-LABEL: @write_bool(
+// CHECK-256-LABEL: define {{[^@]+}}@write_bool(
  // CHECK-256-NEXT:  entry:
  // CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[X:%.*]] to <vscale x 2 x i8>
  // CHECK-256-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> [[TMP0]], i64 0)
-// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT:    store <4 x i8> [[CASTFIXEDSVE]], <4 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-256-NEXT:    store <4 x i8> [[CASTFIXEDSVE]], ptr [[Y]], align 2, !tbaa [[TBAA2]]
  // CHECK-256-NEXT:    ret void
  //
-// CHECK-512-LABEL: @write_bool(
+// CHECK-512-LABEL: define {{[^@]+}}@write_bool(
  // CHECK-512-NEXT:  entry:
  // CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1> [[X:%.*]] to <vscale x 2 x i8>
  // CHECK-512-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[TMP0]], i64 0)
-// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-512-NEXT:    store <8 x i8> [[CASTFIXEDSVE]], <8 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-512-NEXT:    store <8 x i8> [[CASTFIXEDSVE]], ptr [[Y]], align 2, !tbaa [[TBAA2]]
  // CHECK-512-NEXT:    ret void
  //
  void write_bool(struct struct_bool *s, svbool_t x) {
diff --git a/clang/test/CodeGen/clear_cache.c b/clang/test/CodeGen/clear_cache.c

index 59bd8c3..ca157ee 100644 (file)
--- a/clang/test/CodeGen/clear_cache.c
+++ b/clang/test/CodeGen/clear_cache.c
@@ -1,16 +1,16 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
  
  char buffer[32] = "This is a largely unused buffer";
  
  // __builtin___clear_cache always maps to @llvm.clear_cache, but what
  // each back-end produces is different, and this is tested in LLVM
  
-// CHECK-LABEL: @main(
+// CHECK-LABEL: define {{[^@]+}}@main(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i32 0, i32* [[RETVAL]], align 4
-// CHECK-NEXT:    call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i64 0, i64 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i64 1, i64 0))
+// CHECK-NEXT:    store i32 0, ptr [[RETVAL]], align 4
+// CHECK-NEXT:    call void @llvm.clear_cache(ptr @buffer, ptr getelementptr inbounds (i8, ptr @buffer, i64 32))
  // CHECK-NEXT:    ret i32 0
  //
  int main(void) {
diff --git a/clang/test/CodeGen/complex-strictfp.c b/clang/test/CodeGen/complex-strictfp.c

index af6d0c3..0bffed4 100644 (file)
--- a/clang/test/CodeGen/complex-strictfp.c
+++ b/clang/test/CodeGen/complex-strictfp.c
@@ -1,5 +1,5 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-unknown -ffp-exception-behavior=maytrap -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -ffp-exception-behavior=maytrap -emit-llvm -o - %s | FileCheck %s
  
  
  // Test that the constrained intrinsics are picking up the exception
@@ -15,45 +15,45 @@ _Complex double g1, g2;
  _Complex float cf;
  double D;
  
-// CHECK-LABEL: @test3a(
+// CHECK-LABEL: define {{[^@]+}}@test3a(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load double, double* @D, align 8
-// CHECK-NEXT:    [[CF_REAL:%.*]] = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 0), align 4
-// CHECK-NEXT:    [[CF_IMAG:%.*]] = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 1), align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr @D, align 8
+// CHECK-NEXT:    [[CF_REAL:%.*]] = load float, ptr @cf, align 4
+// CHECK-NEXT:    [[CF_IMAG:%.*]] = load float, ptr getelementptr inbounds ({ float, float }, ptr @cf, i32 0, i32 1), align 4
  // CHECK-NEXT:    [[CONV:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[CF_REAL]], metadata !"fpexcept.strict") #[[ATTR2:[0-9]+]]
  // CHECK-NEXT:    [[CONV1:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[CF_IMAG]], metadata !"fpexcept.strict") #[[ATTR2]]
  // CHECK-NEXT:    [[ADD_R:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[CONV]], double [[TMP0]], metadata !"round.upward", metadata !"fpexcept.strict") #[[ATTR2]]
  // CHECK-NEXT:    [[CONV2:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[ADD_R]], metadata !"round.upward", metadata !"fpexcept.strict") #[[ATTR2]]
  // CHECK-NEXT:    [[CONV3:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[CONV1]], metadata !"round.upward", metadata !"fpexcept.strict") #[[ATTR2]]
-// CHECK-NEXT:    store float [[CONV2]], float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 0), align 4
-// CHECK-NEXT:    store float [[CONV3]], float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 1), align 4
+// CHECK-NEXT:    store float [[CONV2]], ptr @cf, align 4
+// CHECK-NEXT:    store float [[CONV3]], ptr getelementptr inbounds ({ float, float }, ptr @cf, i32 0, i32 1), align 4
  // CHECK-NEXT:    ret void
  //
  void test3a(void) {
    cf += D;
  }
  
-// CHECK-LABEL: @test3b(
+// CHECK-LABEL: define {{[^@]+}}@test3b(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CF_REAL:%.*]] = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 0), align 4
-// CHECK-NEXT:    [[CF_IMAG:%.*]] = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 1), align 4
+// CHECK-NEXT:    [[CF_REAL:%.*]] = load float, ptr @cf, align 4
+// CHECK-NEXT:    [[CF_IMAG:%.*]] = load float, ptr getelementptr inbounds ({ float, float }, ptr @cf, i32 0, i32 1), align 4
  // CHECK-NEXT:    [[CONV:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[CF_REAL]], metadata !"fpexcept.strict") #[[ATTR2]]
  // CHECK-NEXT:    [[CONV1:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[CF_IMAG]], metadata !"fpexcept.strict") #[[ATTR2]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load double, double* @D, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr @D, align 8
  // CHECK-NEXT:    [[ADD_R:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP0]], double [[CONV]], metadata !"round.upward", metadata !"fpexcept.strict") #[[ATTR2]]
-// CHECK-NEXT:    store double [[ADD_R]], double* @D, align 8
+// CHECK-NEXT:    store double [[ADD_R]], ptr @D, align 8
  // CHECK-NEXT:    ret void
  //
  void test3b(void) {
    D += cf;
  }
  
-// CHECK-LABEL: @test3c(
+// CHECK-LABEL: define {{[^@]+}}@test3c(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[G1_REAL:%.*]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 0), align 8
-// CHECK-NEXT:    [[G1_IMAG:%.*]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 1), align 8
-// CHECK-NEXT:    [[CF_REAL:%.*]] = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 0), align 4
-// CHECK-NEXT:    [[CF_IMAG:%.*]] = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 1), align 4
+// CHECK-NEXT:    [[G1_REAL:%.*]] = load double, ptr @g1, align 8
+// CHECK-NEXT:    [[G1_IMAG:%.*]] = load double, ptr getelementptr inbounds ({ double, double }, ptr @g1, i32 0, i32 1), align 8
+// CHECK-NEXT:    [[CF_REAL:%.*]] = load float, ptr @cf, align 4
+// CHECK-NEXT:    [[CF_IMAG:%.*]] = load float, ptr getelementptr inbounds ({ float, float }, ptr @cf, i32 0, i32 1), align 4
  // CHECK-NEXT:    [[CONV:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[CF_REAL]], metadata !"fpexcept.strict") #[[ATTR2]]
  // CHECK-NEXT:    [[CONV1:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[CF_IMAG]], metadata !"fpexcept.strict") #[[ATTR2]]
  // CHECK-NEXT:    [[CALL:%.*]] = call { double, double } @__divdc3(double noundef [[CONV]], double noundef [[CONV1]], double noundef [[G1_REAL]], double noundef [[G1_IMAG]]) #[[ATTR3:[0-9]+]]
@@ -61,63 +61,63 @@ void test3b(void) {
  // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { double, double } [[CALL]], 1
  // CHECK-NEXT:    [[CONV2:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[TMP0]], metadata !"round.upward", metadata !"fpexcept.strict") #[[ATTR2]]
  // CHECK-NEXT:    [[CONV3:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[TMP1]], metadata !"round.upward", metadata !"fpexcept.strict") #[[ATTR2]]
-// CHECK-NEXT:    store float [[CONV2]], float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 0), align 4
-// CHECK-NEXT:    store float [[CONV3]], float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 1), align 4
+// CHECK-NEXT:    store float [[CONV2]], ptr @cf, align 4
+// CHECK-NEXT:    store float [[CONV3]], ptr getelementptr inbounds ({ float, float }, ptr @cf, i32 0, i32 1), align 4
  // CHECK-NEXT:    ret void
  //
  void test3c(void) {
    cf /= g1;
  }
  
-// CHECK-LABEL: @test3d(
+// CHECK-LABEL: define {{[^@]+}}@test3d(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[G1_REAL:%.*]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 0), align 8
-// CHECK-NEXT:    [[G1_IMAG:%.*]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 1), align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load double, double* @D, align 8
+// CHECK-NEXT:    [[G1_REAL:%.*]] = load double, ptr @g1, align 8
+// CHECK-NEXT:    [[G1_IMAG:%.*]] = load double, ptr getelementptr inbounds ({ double, double }, ptr @g1, i32 0, i32 1), align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr @D, align 8
  // CHECK-NEXT:    [[ADD_R:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[G1_REAL]], double [[TMP0]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
-// CHECK-NEXT:    store double [[ADD_R]], double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 0), align 8
-// CHECK-NEXT:    store double [[G1_IMAG]], double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 1), align 8
+// CHECK-NEXT:    store double [[ADD_R]], ptr @g1, align 8
+// CHECK-NEXT:    store double [[G1_IMAG]], ptr getelementptr inbounds ({ double, double }, ptr @g1, i32 0, i32 1), align 8
  // CHECK-NEXT:    ret void
  //
  void test3d(void) {
    g1 = g1 + D;
  }
  
-// CHECK-LABEL: @test3e(
+// CHECK-LABEL: define {{[^@]+}}@test3e(
  // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load double, double* @D, align 8
-// CHECK-NEXT:    [[G1_REAL:%.*]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 0), align 8
-// CHECK-NEXT:    [[G1_IMAG:%.*]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 1), align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr @D, align 8
+// CHECK-NEXT:    [[G1_REAL:%.*]] = load double, ptr @g1, align 8
+// CHECK-NEXT:    [[G1_IMAG:%.*]] = load double, ptr getelementptr inbounds ({ double, double }, ptr @g1, i32 0, i32 1), align 8
  // CHECK-NEXT:    [[ADD_R:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP0]], double [[G1_REAL]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
-// CHECK-NEXT:    store double [[ADD_R]], double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 0), align 8
-// CHECK-NEXT:    store double [[G1_IMAG]], double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 1), align 8
+// CHECK-NEXT:    store double [[ADD_R]], ptr @g1, align 8
+// CHECK-NEXT:    store double [[G1_IMAG]], ptr getelementptr inbounds ({ double, double }, ptr @g1, i32 0, i32 1), align 8
  // CHECK-NEXT:    ret void
  //
  void test3e(void) {
    g1 = D + g1;
  }
  
-// CHECK-LABEL: @t1(
+// CHECK-LABEL: define {{[^@]+}}@t1(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[CONV:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double 4.000000e+00, metadata !"round.upward", metadata !"fpexcept.strict") #[[ATTR2]]
-// CHECK-NEXT:    store float [[CONV]], float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 0), align 4
+// CHECK-NEXT:    store float [[CONV]], ptr @cf, align 4
  // CHECK-NEXT:    ret void
  //
  void t1(void) {
    (__real__ cf) = 4.0;
  }
  
-// CHECK-LABEL: @t2(
+// CHECK-LABEL: define {{[^@]+}}@t2(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[CONV:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double 4.000000e+00, metadata !"round.upward", metadata !"fpexcept.strict") #[[ATTR2]]
-// CHECK-NEXT:    store float [[CONV]], float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 1), align 4
+// CHECK-NEXT:    store float [[CONV]], ptr getelementptr inbounds ({ float, float }, ptr @cf, i32 0, i32 1), align 4
  // CHECK-NEXT:    ret void
  //
  void t2(void) {
    (__imag__ cf) = 4.0;
  }
  
-// CHECK-LABEL: @t91(
+// CHECK-LABEL: define {{[^@]+}}@t91(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[C:%.*]] = alloca [0 x i8], align 1
  // CHECK-NEXT:    br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -139,7 +139,7 @@ void t91(void) {
    (0 ? 2.0f : (_Complex double) 2.0f);
  }
  
-// CHECK-LABEL: @t92(
+// CHECK-LABEL: define {{[^@]+}}@t92(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[C:%.*]] = alloca [0 x i8], align 1
  // CHECK-NEXT:    br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
diff --git a/clang/test/CodeGenCXX/for-range.cpp b/clang/test/CodeGenCXX/for-range.cpp

index 37eb4a4..10d2720 100644 (file)
--- a/clang/test/CodeGenCXX/for-range.cpp
+++ b/clang/test/CodeGenCXX/for-range.cpp
@@ -1,5 +1,5 @@
  // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-linux-gnu -std=c++11 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++11 -emit-llvm -o - %s | FileCheck %s
  
  struct A {
    A();
@@ -33,35 +33,35 @@ B *end(C&);
  
  extern B array[5];
  
-// CHECK-LABEL: @_Z9for_arrayv(
+// CHECK-LABEL: define {{[^@]+}}@_Z9for_arrayv(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 1
-// CHECK-NEXT:    [[__RANGE1:%.*]] = alloca [5 x %struct.B]*, align 8
-// CHECK-NEXT:    [[__BEGIN1:%.*]] = alloca %struct.B*, align 8
-// CHECK-NEXT:    [[__END1:%.*]] = alloca %struct.B*, align 8
+// CHECK-NEXT:    [[__RANGE1:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[__BEGIN1:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[__END1:%.*]] = alloca ptr, align 8
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_B:%.*]], align 1
-// CHECK-NEXT:    call void @_ZN1AC1Ev(%struct.A* noundef nonnull align 1 dereferenceable(1) [[A]])
-// CHECK-NEXT:    store [5 x %struct.B]* @array, [5 x %struct.B]** [[__RANGE1]], align 8
-// CHECK-NEXT:    store %struct.B* getelementptr inbounds ([5 x %struct.B], [5 x %struct.B]* @array, i64 0, i64 0), %struct.B** [[__BEGIN1]], align 8
-// CHECK-NEXT:    store %struct.B* getelementptr inbounds ([5 x %struct.B], [5 x %struct.B]* @array, i64 1, i64 0), %struct.B** [[__END1]], align 8
+// CHECK-NEXT:    call void @_ZN1AC1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[A]])
+// CHECK-NEXT:    store ptr @array, ptr [[__RANGE1]], align 8
+// CHECK-NEXT:    store ptr @array, ptr [[__BEGIN1]], align 8
+// CHECK-NEXT:    store ptr getelementptr inbounds ([[STRUCT_B]], ptr @array, i64 5), ptr [[__END1]], align 8
  // CHECK-NEXT:    br label [[FOR_COND:%.*]]
  // CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load %struct.B*, %struct.B** [[__END1]], align 8
-// CHECK-NEXT:    [[CMP:%.*]] = icmp ne %struct.B* [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__END1]], align 8
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[TMP0]], [[TMP1]]
  // CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
  // CHECK:       for.body:
-// CHECK-NEXT:    [[TMP2:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8
-// CHECK-NEXT:    call void @_ZN1BC1ERKS_(%struct.B* noundef nonnull align 1 dereferenceable(1) [[B]], %struct.B* noundef nonnull align 1 dereferenceable(1) [[TMP2]])
-// CHECK-NEXT:    call void @_ZN1BD1Ev(%struct.B* noundef nonnull align 1 dereferenceable(1) [[B]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
+// CHECK-NEXT:    call void @_ZN1BC1ERKS_(ptr noundef nonnull align 1 dereferenceable(1) [[B]], ptr noundef nonnull align 1 dereferenceable(1) [[TMP2]])
+// CHECK-NEXT:    call void @_ZN1BD1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[B]]) #[[ATTR3:[0-9]+]]
  // CHECK-NEXT:    br label [[FOR_INC:%.*]]
  // CHECK:       for.inc:
-// CHECK-NEXT:    [[TMP3:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], %struct.B* [[TMP3]], i32 1
-// CHECK-NEXT:    store %struct.B* [[INCDEC_PTR]], %struct.B** [[__BEGIN1]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], ptr [[TMP3]], i32 1
+// CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8
  // CHECK-NEXT:    br label [[FOR_COND]]
  // CHECK:       for.end:
-// CHECK-NEXT:    call void @_ZN1AD1Ev(%struct.A* noundef nonnull align 1 dereferenceable(1) [[A]]) #[[ATTR3]]
+// CHECK-NEXT:    call void @_ZN1AD1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[A]]) #[[ATTR3]]
  // CHECK-NEXT:    ret void
  //
  void for_array() {
@@ -70,44 +70,44 @@ void for_array() {
    }
  }
  
-// CHECK-LABEL: @_Z9for_rangev(
+// CHECK-LABEL: define {{[^@]+}}@_Z9for_rangev(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 1
-// CHECK-NEXT:    [[__RANGE1:%.*]] = alloca %struct.C*, align 8
+// CHECK-NEXT:    [[__RANGE1:%.*]] = alloca ptr, align 8
  // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_C:%.*]], align 1
-// CHECK-NEXT:    [[__BEGIN1:%.*]] = alloca %struct.B*, align 8
-// CHECK-NEXT:    [[__END1:%.*]] = alloca %struct.B*, align 8
+// CHECK-NEXT:    [[__BEGIN1:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[__END1:%.*]] = alloca ptr, align 8
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_B:%.*]], align 1
-// CHECK-NEXT:    call void @_ZN1AC1Ev(%struct.A* noundef nonnull align 1 dereferenceable(1) [[A]])
-// CHECK-NEXT:    call void @_ZN1CC1Ev(%struct.C* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]])
-// CHECK-NEXT:    store %struct.C* [[REF_TMP]], %struct.C** [[__RANGE1]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load %struct.C*, %struct.C** [[__RANGE1]], align 8
-// CHECK-NEXT:    [[CALL:%.*]] = call noundef %struct.B* @_Z5beginR1C(%struct.C* noundef nonnull align 1 dereferenceable(1) [[TMP0]])
-// CHECK-NEXT:    store %struct.B* [[CALL]], %struct.B** [[__BEGIN1]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load %struct.C*, %struct.C** [[__RANGE1]], align 8
-// CHECK-NEXT:    [[CALL1:%.*]] = call noundef %struct.B* @_Z3endR1C(%struct.C* noundef nonnull align 1 dereferenceable(1) [[TMP1]])
-// CHECK-NEXT:    store %struct.B* [[CALL1]], %struct.B** [[__END1]], align 8
+// CHECK-NEXT:    call void @_ZN1AC1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[A]])
+// CHECK-NEXT:    call void @_ZN1CC1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]])
+// CHECK-NEXT:    store ptr [[REF_TMP]], ptr [[__RANGE1]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__RANGE1]], align 8
+// CHECK-NEXT:    [[CALL:%.*]] = call noundef ptr @_Z5beginR1C(ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]])
+// CHECK-NEXT:    store ptr [[CALL]], ptr [[__BEGIN1]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8
+// CHECK-NEXT:    [[CALL1:%.*]] = call noundef ptr @_Z3endR1C(ptr noundef nonnull align 1 dereferenceable(1) [[TMP1]])
+// CHECK-NEXT:    store ptr [[CALL1]], ptr [[__END1]], align 8
  // CHECK-NEXT:    br label [[FOR_COND:%.*]]
  // CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP2:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8
-// CHECK-NEXT:    [[TMP3:%.*]] = load %struct.B*, %struct.B** [[__END1]], align 8
-// CHECK-NEXT:    [[CMP:%.*]] = icmp ne %struct.B* [[TMP2]], [[TMP3]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__END1]], align 8
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[TMP2]], [[TMP3]]
  // CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
  // CHECK:       for.cond.cleanup:
-// CHECK-NEXT:    call void @_ZN1CD1Ev(%struct.C* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) #[[ATTR3]]
+// CHECK-NEXT:    call void @_ZN1CD1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) #[[ATTR3]]
  // CHECK-NEXT:    br label [[FOR_END:%.*]]
  // CHECK:       for.body:
-// CHECK-NEXT:    [[TMP4:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8
-// CHECK-NEXT:    call void @_ZN1BC1ERKS_(%struct.B* noundef nonnull align 1 dereferenceable(1) [[B]], %struct.B* noundef nonnull align 1 dereferenceable(1) [[TMP4]])
-// CHECK-NEXT:    call void @_ZN1BD1Ev(%struct.B* noundef nonnull align 1 dereferenceable(1) [[B]]) #[[ATTR3]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
+// CHECK-NEXT:    call void @_ZN1BC1ERKS_(ptr noundef nonnull align 1 dereferenceable(1) [[B]], ptr noundef nonnull align 1 dereferenceable(1) [[TMP4]])
+// CHECK-NEXT:    call void @_ZN1BD1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[B]]) #[[ATTR3]]
  // CHECK-NEXT:    br label [[FOR_INC:%.*]]
  // CHECK:       for.inc:
-// CHECK-NEXT:    [[TMP5:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], %struct.B* [[TMP5]], i32 1
-// CHECK-NEXT:    store %struct.B* [[INCDEC_PTR]], %struct.B** [[__BEGIN1]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], ptr [[TMP5]], i32 1
+// CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8
  // CHECK-NEXT:    br label [[FOR_COND]]
  // CHECK:       for.end:
-// CHECK-NEXT:    call void @_ZN1AD1Ev(%struct.A* noundef nonnull align 1 dereferenceable(1) [[A]]) #[[ATTR3]]
+// CHECK-NEXT:    call void @_ZN1AD1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[A]]) #[[ATTR3]]
  // CHECK-NEXT:    ret void
  //
  void for_range() {
@@ -116,44 +116,44 @@ void for_range() {
    }
  }
  
-// CHECK-LABEL: @_Z16for_member_rangev(
+// CHECK-LABEL: define {{[^@]+}}@_Z16for_member_rangev(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 1
-// CHECK-NEXT:    [[__RANGE1:%.*]] = alloca %struct.D*, align 8
+// CHECK-NEXT:    [[__RANGE1:%.*]] = alloca ptr, align 8
  // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_D:%.*]], align 1
-// CHECK-NEXT:    [[__BEGIN1:%.*]] = alloca %struct.B*, align 8
-// CHECK-NEXT:    [[__END1:%.*]] = alloca %struct.B*, align 8
+// CHECK-NEXT:    [[__BEGIN1:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[__END1:%.*]] = alloca ptr, align 8
  // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_B:%.*]], align 1
-// CHECK-NEXT:    call void @_ZN1AC1Ev(%struct.A* noundef nonnull align 1 dereferenceable(1) [[A]])
-// CHECK-NEXT:    call void @_ZN1DC1Ev(%struct.D* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]])
-// CHECK-NEXT:    store %struct.D* [[REF_TMP]], %struct.D** [[__RANGE1]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load %struct.D*, %struct.D** [[__RANGE1]], align 8
-// CHECK-NEXT:    [[CALL:%.*]] = call noundef %struct.B* @_ZN1D5beginEv(%struct.D* noundef nonnull align 1 dereferenceable(1) [[TMP0]])
-// CHECK-NEXT:    store %struct.B* [[CALL]], %struct.B** [[__BEGIN1]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load %struct.D*, %struct.D** [[__RANGE1]], align 8
-// CHECK-NEXT:    [[CALL1:%.*]] = call noundef %struct.B* @_ZN1D3endEv(%struct.D* noundef nonnull align 1 dereferenceable(1) [[TMP1]])
-// CHECK-NEXT:    store %struct.B* [[CALL1]], %struct.B** [[__END1]], align 8
+// CHECK-NEXT:    call void @_ZN1AC1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[A]])
+// CHECK-NEXT:    call void @_ZN1DC1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]])
+// CHECK-NEXT:    store ptr [[REF_TMP]], ptr [[__RANGE1]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__RANGE1]], align 8
+// CHECK-NEXT:    [[CALL:%.*]] = call noundef ptr @_ZN1D5beginEv(ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]])
+// CHECK-NEXT:    store ptr [[CALL]], ptr [[__BEGIN1]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8
+// CHECK-NEXT:    [[CALL1:%.*]] = call noundef ptr @_ZN1D3endEv(ptr noundef nonnull align 1 dereferenceable(1) [[TMP1]])
+// CHECK-NEXT:    store ptr [[CALL1]], ptr [[__END1]], align 8
  // CHECK-NEXT:    br label [[FOR_COND:%.*]]
  // CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP2:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8
-// CHECK-NEXT:    [[TMP3:%.*]] = load %struct.B*, %struct.B** [[__END1]], align 8
-// CHECK-NEXT:    [[CMP:%.*]] = icmp ne %struct.B* [[TMP2]], [[TMP3]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[__END1]], align 8
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[TMP2]], [[TMP3]]
  // CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
  // CHECK:       for.cond.cleanup:
-// CHECK-NEXT:    call void @_ZN1DD1Ev(%struct.D* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) #[[ATTR3]]
+// CHECK-NEXT:    call void @_ZN1DD1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) #[[ATTR3]]
  // CHECK-NEXT:    br label [[FOR_END:%.*]]
  // CHECK:       for.body:
-// CHECK-NEXT:    [[TMP4:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8
-// CHECK-NEXT:    call void @_ZN1BC1ERKS_(%struct.B* noundef nonnull align 1 dereferenceable(1) [[B]], %struct.B* noundef nonnull align 1 dereferenceable(1) [[TMP4]])
-// CHECK-NEXT:    call void @_ZN1BD1Ev(%struct.B* noundef nonnull align 1 dereferenceable(1) [[B]]) #[[ATTR3]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
+// CHECK-NEXT:    call void @_ZN1BC1ERKS_(ptr noundef nonnull align 1 dereferenceable(1) [[B]], ptr noundef nonnull align 1 dereferenceable(1) [[TMP4]])
+// CHECK-NEXT:    call void @_ZN1BD1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[B]]) #[[ATTR3]]
  // CHECK-NEXT:    br label [[FOR_INC:%.*]]
  // CHECK:       for.inc:
-// CHECK-NEXT:    [[TMP5:%.*]] = load %struct.B*, %struct.B** [[__BEGIN1]], align 8
-// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], %struct.B* [[TMP5]], i32 1
-// CHECK-NEXT:    store %struct.B* [[INCDEC_PTR]], %struct.B** [[__BEGIN1]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8
+// CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], ptr [[TMP5]], i32 1
+// CHECK-NEXT:    store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8
  // CHECK-NEXT:    br label [[FOR_COND]]
  // CHECK:       for.end:
-// CHECK-NEXT:    call void @_ZN1AD1Ev(%struct.A* noundef nonnull align 1 dereferenceable(1) [[A]]) #[[ATTR3]]
+// CHECK-NEXT:    call void @_ZN1AD1Ev(ptr noundef nonnull align 1 dereferenceable(1) [[A]]) #[[ATTR3]]
  // CHECK-NEXT:    ret void
  //
  void for_member_range() {
author	Nikita Popov <npopov@redhat.com>
	Mon, 12 Dec 2022 16:13:39 +0000 (17:13 +0100)
committer	Nikita Popov <npopov@redhat.com>
	Mon, 12 Dec 2022 16:24:40 +0000 (17:24 +0100)
clang/test/CodeGen/aarch64-ls64.c		patch \| blob \| history
clang/test/CodeGen/aarch64-neon-tbl.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1-bfloat.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sb.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sh.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1sw.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1ub.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uh.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1uw.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1-bfloat.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sb.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sh.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sw.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1ub.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uh.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1uw.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1-bfloat.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1-bfloat.c		patch \| blob \| history
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c		patch \| blob \| history
clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c		patch \| blob \| history
clang/test/CodeGen/clear_cache.c		patch \| blob \| history
clang/test/CodeGen/complex-strictfp.c		patch \| blob \| history
clang/test/CodeGenCXX/for-range.cpp		patch \| blob \| history