-// RUN: %clang_cc1 -triple i386-unknown-unknown -O1 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -O2 -emit-llvm -o - %s | FileCheck %s
// CHECK-LABEL: define i32 @f0()
// CHECK: ret i32 0
// CHECK-LABEL: define i32 @f1()
-// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
-// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
-// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=NATIVE
+// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
+// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
+// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=NATIVE
__fp16 g;
// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \
-// RUN: -mfloat-abi soft -target-feature +neon -emit-llvm -o - -O1 %s \
+// RUN: -mfloat-abi soft -target-feature +neon -emit-llvm -o - -O2 %s \
// RUN: | FileCheck %s --check-prefix=CHECK-SOFT
// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \
-// RUN: -mfloat-abi hard -target-feature +neon -emit-llvm -o - -O1 %s \
+// RUN: -mfloat-abi hard -target-feature +neon -emit-llvm -o - -O2 %s \
// RUN: | FileCheck %s --check-prefix=CHECK-HARD
// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \
// RUN: -mfloat-abi hard -target-feature +neon -target-feature +fullfp16 \
-// RUN: -emit-llvm -o - -O1 %s \
+// RUN: -emit-llvm -o - -O2 %s \
// RUN: | FileCheck %s --check-prefix=CHECK-FULL
typedef float float32_t;
int *test_c11_atomic_fetch_add_int_ptr(_Atomic(int *) *p) {
// CHECK: test_c11_atomic_fetch_add_int_ptr
- // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5)
+ // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5)
return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst);
}
int *test_c11_atomic_fetch_sub_int_ptr(_Atomic(int *) *p) {
// CHECK: test_c11_atomic_fetch_sub_int_ptr
- // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5)
+ // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5)
return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst);
}
int test_c11_atomic_fetch_add_int(_Atomic(int) *p) {
// CHECK: test_c11_atomic_fetch_add_int
- // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 3, i32 5)
+ // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 3, i32 5)
return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst);
}
int test_c11_atomic_fetch_sub_int(_Atomic(int) *p) {
// CHECK: test_c11_atomic_fetch_sub_int
- // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 5, i32 5)
+ // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 5, i32 5)
return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst);
}
int *fp2a(int **p) {
// CHECK: @fp2a
- // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0)
+ // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0)
// Note, the GNU builtins do not multiply by sizeof(T)!
return __atomic_fetch_sub(p, 4, memory_order_relaxed);
}
int test_atomic_fetch_add(int *p) {
// CHECK: test_atomic_fetch_add
- // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+ // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5)
return __atomic_fetch_add(p, 55, memory_order_seq_cst);
}
int test_atomic_fetch_sub(int *p) {
// CHECK: test_atomic_fetch_sub
- // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+ // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5)
return __atomic_fetch_sub(p, 55, memory_order_seq_cst);
}
int test_atomic_fetch_and(int *p) {
// CHECK: test_atomic_fetch_and
- // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+ // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5)
return __atomic_fetch_and(p, 55, memory_order_seq_cst);
}
int test_atomic_fetch_or(int *p) {
// CHECK: test_atomic_fetch_or
- // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+ // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5)
return __atomic_fetch_or(p, 55, memory_order_seq_cst);
}
int test_atomic_fetch_xor(int *p) {
// CHECK: test_atomic_fetch_xor
- // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+ // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5)
return __atomic_fetch_xor(p, 55, memory_order_seq_cst);
}
int test_atomic_fetch_nand(int *p) {
// CHECK: test_atomic_fetch_nand
- // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+ // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5)
return __atomic_fetch_nand(p, 55, memory_order_seq_cst);
}
int test_atomic_add_fetch(int *p) {
// CHECK: test_atomic_add_fetch
- // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+ // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5)
// CHECK: {{%[^ ]*}} = add i32 [[CALL]], 55
return __atomic_add_fetch(p, 55, memory_order_seq_cst);
}
int test_atomic_sub_fetch(int *p) {
// CHECK: test_atomic_sub_fetch
- // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+ // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5)
// CHECK: {{%[^ ]*}} = add i32 [[CALL]], -55
return __atomic_sub_fetch(p, 55, memory_order_seq_cst);
}
int test_atomic_and_fetch(int *p) {
// CHECK: test_atomic_and_fetch
- // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+ // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5)
// CHECK: {{%[^ ]*}} = and i32 [[CALL]], 55
return __atomic_and_fetch(p, 55, memory_order_seq_cst);
}
int test_atomic_or_fetch(int *p) {
// CHECK: test_atomic_or_fetch
- // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+ // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5)
// CHECK: {{%[^ ]*}} = or i32 [[CALL]], 55
return __atomic_or_fetch(p, 55, memory_order_seq_cst);
}
int test_atomic_xor_fetch(int *p) {
// CHECK: test_atomic_xor_fetch
- // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+ // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5)
// CHECK: {{%[^ ]*}} = xor i32 [[CALL]], 55
return __atomic_xor_fetch(p, 55, memory_order_seq_cst);
}
int test_atomic_nand_fetch(int *p) {
// CHECK: test_atomic_nand_fetch
- // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+ // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5)
// FIXME: We should not be checking optimized IR. It changes independently of clang.
// FIXME-CHECK: [[AND:%[^ ]*]] = and i32 [[CALL]], 55
// FIXME-CHECK: {{%[^ ]*}} = xor i32 [[AND]], -1
// CHECK-LABEL: define void @_Z11atomic_initR1Ai
void atomic_init(A& a, int i) {
// CHECK-NOT: atomic
- // CHECK: tail call void @_ZN1BC1Ei
+ // CHECK: call void @_ZN1BC1Ei
__c11_atomic_init(&b, B(i));
// CHECK-NEXT: ret void
}
// ZERO-LABEL: @test_smallpartinit_uninit()
// ZERO-O0: call void @llvm.memset{{.*}}, i8 0,
// ZERO-O1-LEGACY: store i16 0, i16* %uninit, align 2
-// ZERO-O1-NEWPM: store i16 42, i16* %uninit, align 2
+// ZERO-O1-NEWPM: store i16 0, i16* %uninit, align 2
TEST_BRACES(smallpartinit, smallpartinit);
// CHECK-LABEL: @test_smallpartinit_braces()
// PATTERN-LABEL: @test_paddednullinit_uninit()
// PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_paddednullinit_uninit.uninit
// PATTERN-O1-LEGACY: store i64 [[I64]], i64* %uninit, align 8
-// PATTERN-O1-NEWPM: store i64 2863311360, i64* %uninit, align 8
+// PATTERN-O1-NEWPM: store i64 [[I64]], i64* %uninit, align 8
// ZERO-LABEL: @test_paddednullinit_uninit()
// ZERO-O0: call void @llvm.memset{{.*}}, i8 0,
// ZERO-O1: store i64 0, i64* %uninit, align 8
// ZERO-LABEL: @test_virtualderived_uninit()
// ZERO-O0: call void @llvm.memset{{.*}}, i8 0,
// ZERO-O1-LEGACY: call void @llvm.memset{{.*}}, i8 0,
-// ZERO-O1-NEWPM: [[FIELD1:%.*]] = getelementptr inbounds %struct.virtualderived, %struct.virtualderived* %uninit, i64 0, i32 1, i32 0, i32 0
-// ZERO-O1-NEWPM: [[FIELD0:%.*]] = getelementptr inbounds %struct.virtualderived, %struct.virtualderived* %uninit, i64 0, i32 0, i32 0
-// ZERO-O1-NEWPM: store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*], [5 x i8*] }, { [7 x i8*], [5 x i8*] }* @_ZTV14virtualderived, i64 0, inrange i32 0, i64 5) to i32 (...)**), i32 (...)*** [[FIELD0]], align 8
-// ZERO-O1-NEWPM: store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*], [5 x i8*] }, { [7 x i8*], [5 x i8*] }* @_ZTV14virtualderived, i64 0, inrange i32 1, i64 3) to i32 (...)**), i32 (...)*** [[FIELD1]], align 8
+// ZERO-O1-NEWPM: call void @llvm.memset{{.*}}, i8 0,
TEST_BRACES(virtualderived, virtualderived);
// CHECK-LABEL: @test_virtualderived_braces()
if (pred) {
// DISCARDVALUE: 2:
- // DISCARDVALUE-NEXT: tail call void @branch()
+ // DISCARDVALUE-NEXT: call void @branch()
// DISCARDVALUE-NEXT: br label %3
// CHECK: if.then:
- // CHECK-NEXT: tail call void @branch()
+ // CHECK-NEXT: call void @branch()
// CHECK-NEXT: br label %if.end
branch();
}
T* test1(V* x) { return &dynamic_cast<T&>(*x); }
// CHECK-LABEL: define dso_local %struct.T* @"?test1@@YAPAUT@@PAUV@@@Z"(%struct.V* %x)
// CHECK: [[CAST:%.*]] = bitcast %struct.V* %x to i8*
-// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1)
+// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1)
// CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T*
// CHECK-NEXT: ret %struct.T* [[RET]]
// CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1
// CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4
// CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[VBOFFS]]
-// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[VBOFFS]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1)
+// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[VBOFFS]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1)
// CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T*
// CHECK-NEXT: ret %struct.T* [[RET]]
// CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4
// CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4
// CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 [[DELTA]]
-// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1)
+// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1)
// CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T*
// CHECK-NEXT: ret %struct.T* [[RET]]
T* test4(V* x) { return dynamic_cast<T*>(x); }
// CHECK-LABEL: define dso_local %struct.T* @"?test4@@YAPAUT@@PAUV@@@Z"(%struct.V* %x)
// CHECK: [[CAST:%.*]] = bitcast %struct.V* %x to i8*
-// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0)
+// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0)
// CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T*
// CHECK-NEXT: ret %struct.T* [[RET]]
// CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1
// CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4
// CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 [[VBOFFS]]
-// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* nonnull [[ADJ]], i32 [[VBOFFS]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0)
+// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* nonnull [[ADJ]], i32 [[VBOFFS]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0)
// CHECK-NEXT: [[RES:%.*]] = bitcast i8* [[CALL]] to %struct.T*
// CHECK-NEXT: br label
// CHECK: [[RET:%.*]] = phi %struct.T*
// CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4
// CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4
// CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[DELTA]]
-// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0)
+// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0)
// CHECK-NEXT: [[RES:%.*]] = bitcast i8* [[CALL]] to %struct.T*
// CHECK-NEXT: br label
// CHECK: [[RET:%.*]] = phi %struct.T*
void* test7(V* x) { return dynamic_cast<void*>(x); }
// CHECK-LABEL: define dso_local i8* @"?test7@@YAPAXPAUV@@@Z"(%struct.V* %x)
// CHECK: [[CAST:%.*]] = bitcast %struct.V* %x to i8*
-// CHECK-NEXT: [[RET:%.*]] = tail call i8* @__RTCastToVoid(i8* [[CAST]])
+// CHECK-NEXT: [[RET:%.*]] = call i8* @__RTCastToVoid(i8* [[CAST]])
// CHECK-NEXT: ret i8* [[RET]]
void* test8(A* x) { return dynamic_cast<void*>(x); }
// CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1
// CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4
// CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 [[VBOFFS]]
-// CHECK-NEXT: [[RES:%.*]] = tail call i8* @__RTCastToVoid(i8* nonnull [[ADJ]])
+// CHECK-NEXT: [[RES:%.*]] = call i8* @__RTCastToVoid(i8* nonnull [[ADJ]])
// CHECK-NEXT: br label
// CHECK: [[RET:%.*]] = phi i8*
// CHECK-NEXT: ret i8* [[RET]]
// CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4
// CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4
// CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[DELTA]]
-// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTCastToVoid(i8* [[ADJ]])
+// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTCastToVoid(i8* [[ADJ]])
// CHECK-NEXT: br label
// CHECK: [[RET:%.*]] = phi i8*
// CHECK-NEXT: ret i8* [[RET]]
const std::type_info* test3_typeid() { return &typeid(*fn()); }
// CHECK-LABEL: define dso_local %struct.type_info* @"?test3_typeid@@YAPBUtype_info@@XZ"()
-// CHECK: [[CALL:%.*]] = tail call %struct.A* @"?fn@@YAPAUA@@XZ"()
+// CHECK: [[CALL:%.*]] = call %struct.A* @"?fn@@YAPAUA@@XZ"()
// CHECK-NEXT: [[CMP:%.*]] = icmp eq %struct.A* [[CALL]], null
// CHECK-NEXT: br i1 [[CMP]]
-// CHECK: tail call i8* @__RTtypeid(i8* null)
+// CHECK: call i8* @__RTtypeid(i8* null)
// CHECK-NEXT: unreachable
// CHECK: [[THIS:%.*]] = bitcast %struct.A* [[CALL]] to i8*
// CHECK-NEXT: [[VBTBLP:%.*]] = getelementptr %struct.A, %struct.A* [[CALL]], i32 0, i32 0
// CHECK-NEXT: [[VBSLOT:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1
// CHECK-NEXT: [[VBASE_OFFS:%.*]] = load i32, i32* [[VBSLOT]], align 4
// CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[THIS]], i32 [[VBASE_OFFS]]
-// CHECK-NEXT: [[RT:%.*]] = tail call i8* @__RTtypeid(i8* nonnull [[ADJ]])
+// CHECK-NEXT: [[RT:%.*]] = call i8* @__RTtypeid(i8* nonnull [[ADJ]])
// CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[RT]] to %struct.type_info*
// CHECK-NEXT: ret %struct.type_info* [[RET]]
const std::type_info* test5_typeid() { return &typeid(v); }
// CHECK: define dso_local %struct.type_info* @"?test5_typeid@@YAPBUtype_info@@XZ"()
-// CHECK: [[RT:%.*]] = tail call i8* @__RTtypeid(i8* bitcast (%struct.V* @"?v@@3UV@@A" to i8*))
+// CHECK: [[RT:%.*]] = call i8* @__RTtypeid(i8* bitcast (%struct.V* @"?v@@3UV@@A" to i8*))
// CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[RT]] to %struct.type_info*
// CHECK-NEXT: ret %struct.type_info* [[RET]]
// CHECK-LABEL: define void @_Z5test1b(
// CHECK-EH-LABEL: define void @_Z5test1b(
X test1(bool B) {
- // CHECK: tail call {{.*}} @_ZN1XC1Ev
+ // CHECK: call {{.*}} @_ZN1XC1Ev
// CHECK-NEXT: ret void
X x;
if (B)
return (x);
return x;
- // CHECK-EH: tail call {{.*}} @_ZN1XC1Ev
+ // CHECK-EH: call {{.*}} @_ZN1XC1Ev
// CHECK-EH-NEXT: ret void
}
// CHECK-LABEL: define void @_Z5test3b
X test3(bool B) {
- // CHECK: tail call {{.*}} @_ZN1XC1Ev
+ // CHECK: call {{.*}} @_ZN1XC1Ev
// CHECK-NOT: call {{.*}} @_ZN1XC1ERKS_
// CHECK: call {{.*}} @_ZN1XC1Ev
// CHECK: call {{.*}} @_ZN1XC1ERKS_
// CHECK-LABEL: define void @_Z5test4b
X test4(bool B) {
{
- // CHECK: tail call {{.*}} @_ZN1XC1Ev
+ // CHECK: call {{.*}} @_ZN1XC1Ev
X x;
// CHECK: br i1
if (B)
return x;
}
- // CHECK: tail call {{.*}} @_ZN1XD1Ev
- // CHECK: tail call void @exit(i32 1)
+ // CHECK: call {{.*}} @_ZN1XD1Ev
+ // CHECK: call void @exit(i32 1)
exit(1);
}
// CHECK-LABEL: define void @_Z5test7b
X test7(bool b) {
- // CHECK: tail call {{.*}} @_ZN1XC1Ev
+ // CHECK: call {{.*}} @_ZN1XC1Ev
// CHECK-NEXT: ret
if (b) {
X x;
// CHECK-LABEL: define void @_Z5test8b
X test8(bool b) {
- // CHECK: tail call {{.*}} @_ZN1XC1Ev
+ // CHECK: call {{.*}} @_ZN1XC1Ev
// CHECK-NEXT: ret
if (b) {
X x;
}
// CHECK-LABEL: define linkonce_odr void @_ZN1YIiE1fEv
-// CHECK: tail call {{.*}} @_ZN1YIiEC1Ev
+// CHECK: call {{.*}} @_ZN1YIiEC1Ev
// CHECK-EH-03: attributes [[NR_NUW]] = { noreturn nounwind }
-// RUN: %clang_cc1 -triple armv7-unknown-linux-gnueabihf %s -o - -emit-llvm -O1 | FileCheck %s
+// RUN: %clang_cc1 -triple armv7-unknown-linux-gnueabihf %s -o - -emit-llvm -O2 | FileCheck %s
// Stack should be reused when possible, no need to allocate two separate slots
// if they have disjoint lifetime.
// CHECK: define double @_Z7forward9one_field(double returned %{{.*}})
//
// CHECK: define void @_Z14test_one_fieldv()
-// CHECK: %[[call:.*]] = tail call double @_Z13def_one_fieldv()
-// CHECK: tail call void @_Z3use9one_field(double %[[call]])
+// CHECK: %[[call:.*]] = call double @_Z13def_one_fieldv()
+// CHECK: call void @_Z3use9one_field(double %[[call]])
// CHECK: ret void
//
// CHECK: declare void @_Z3use9one_field(double)
// CHECK: define void @_Z7forward5empty()
//
// CHECK: define void @_Z10test_emptyv()
-// CHECK: tail call void @_Z9def_emptyv()
-// CHECK: tail call void @_Z3use5empty()
+// CHECK: call void @_Z9def_emptyv()
+// CHECK: call void @_Z3use5empty()
// CHECK: ret void
//
// CHECK: declare void @_Z3use5empty()
// CHECK: define i32 @_Z7forward12one_bitfield(i32 returned %{{.*}})
//
// CHECK: define void @_Z17test_one_bitfieldv()
-// CHECK: %[[call:.*]] = tail call i32 @_Z16def_one_bitfieldv()
-// CHECK: tail call void @_Z3use12one_bitfield(i32 %[[call]])
+// CHECK: %[[call:.*]] = call i32 @_Z16def_one_bitfieldv()
+// CHECK: call void @_Z3use12one_bitfield(i32 %[[call]])
// CHECK: ret void
//
// CHECK: declare void @_Z3use12one_bitfield(i32)
// CHECK: call void @__clang_call_terminate(
// CHECK-O1-LABEL: define linkonce_odr hidden void @__copy_helper_block_ea8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E(
-// CHECK-O1: tail call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release
+// CHECK-O1: call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release
// CHECK-NOEXCP: define linkonce_odr hidden void @__copy_helper_block_8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E(
// CHECK: define linkonce_odr hidden void @__destroy_helper_block_ea8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E(
// CHECK: call void @__clang_call_terminate(
// CHECK-O1-LABEL: define linkonce_odr hidden void @__destroy_helper_block_ea8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E(
-// CHECK-O1: tail call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release
-// CHECK-O1: tail call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release
+// CHECK-O1: call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release
+// CHECK-O1: call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release
// CHECK-NOEXCP: define linkonce_odr hidden void @__destroy_helper_block_8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E(
namespace {
// CHECK: define internal void @"\01-[NRVO getNRVO]"
- (X)getNRVO {
X x;
- // CHECK: tail call void @_ZN1XC1Ev
+ // CHECK: call void @_ZN1XC1Ev
// CHECK-NEXT: ret void
return x;
}
return ^{
// CHECK-LABEL: define internal void @___Z10blocksNRVOv_block_invoke
X x;
- // CHECK: tail call void @_ZN1XC1Ev
+ // CHECK: call void @_ZN1XC1Ev
// CHECK-NEXT: ret void
return x;
}() ;
-// Test CF+LF are properly handled along with quoted, multi-line #error\r
-// RUN: %clang_cc1 -DOTHER -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s\r
-\r
-#ifndef TEST\r
-#error "message \\r
- more message \\r
- even more"\r
-#endif\r
-\r
-#ifdef OTHER\r
-#include <string>\r
-#endif\r
-\r
-// CHECK: #ifdef OTHER\r
-// CHECK-NEXT: #include <string>\r
-// CHECK-NEXT: #endif\r
+// Test CF+LF are properly handled along with quoted, multi-line #error
+// RUN: %clang_cc1 -DOTHER -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s
+
+#ifndef TEST
+#error "message \
+ more message \
+ even more"
+#endif
+
+#ifdef OTHER
+#include <string>
+#endif
+
+// CHECK: #ifdef OTHER
+// CHECK-NEXT: #include <string>
+// CHECK-NEXT: #endif
-// RUN: %clang_cc1 -x c++-header -triple x86_64-apple-darwin11 -emit-pch -O1 -fblocks -fno-escaping-block-tail-calls -o %t %S/no-escaping-block-tail-calls.h
-// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -include-pch %t -emit-llvm -O1 -fblocks -fno-escaping-block-tail-calls -o - %s | FileCheck %s
+// RUN: %clang_cc1 -x c++-header -triple x86_64-apple-darwin11 -emit-pch -O2 -fblocks -fno-escaping-block-tail-calls -o %t %S/no-escaping-block-tail-calls.h
+// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -include-pch %t -emit-llvm -O2 -fblocks -fno-escaping-block-tail-calls -o - %s | FileCheck %s
// Check that -fno-escaping-block-tail-calls doesn't disable tail-call
// optimization if the block is non-escaping.
/// Optimize quickly without destroying debuggability.
///
- /// FIXME: The current and historical behavior of this level does *not*
- /// agree with this goal, but we would like to move toward this goal in the
- /// future.
- ///
/// This level is tuned to produce a result from the optimizer as quickly
/// as possible and to avoid destroying debuggability. This tends to result
/// in a very good development mode where the compiled code will be
/// debugging of the resulting binary.
///
/// As an example, complex loop transformations such as versioning,
- /// vectorization, or fusion might not make sense here due to the degree to
- /// which the executed code would differ from the source code, and the
- /// potential compile time cost.
+ /// vectorization, or fusion don't make sense here due to the degree to
+ /// which the executed code differs from the source code, and the compile time
+ /// cost.
O1,
/// Optimize for fast execution as much as possible without triggering
FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
// Hoisting of scalars and load expressions.
- if (EnableGVNHoist)
- FPM.addPass(GVNHoistPass());
-
- // Global value numbering based sinking.
- if (EnableGVNSink) {
- FPM.addPass(GVNSinkPass());
- FPM.addPass(SimplifyCFGPass());
+ if (Level > O1) {
+ if (EnableGVNHoist)
+ FPM.addPass(GVNHoistPass());
+
+ // Global value numbering based sinking.
+ if (EnableGVNSink) {
+ FPM.addPass(GVNSinkPass());
+ FPM.addPass(SimplifyCFGPass());
+ }
}
// Speculative execution if the target has divergent branches; otherwise nop.
- FPM.addPass(SpeculativeExecutionPass());
+ if (Level > O1) {
+ FPM.addPass(SpeculativeExecutionPass());
- // Optimize based on known information about branches, and cleanup afterward.
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
+ // Optimize based on known information about branches, and cleanup afterward.
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+ }
FPM.addPass(SimplifyCFGPass());
if (Level == O3)
FPM.addPass(AggressiveInstCombinePass());
// For PGO use pipeline, try to optimize memory intrinsics such as memcpy
// using the size value profile. Don't perform this when optimizing for size.
if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
- !isOptimizingForSize(Level))
+ !isOptimizingForSize(Level) && Level > O1)
FPM.addPass(PGOMemOPSizeOpt());
- FPM.addPass(TailCallElimPass());
+ // TODO: Investigate the cost/benefit of tail call elimination on debugging.
+ if (Level > O1)
+ FPM.addPass(TailCallElimPass());
FPM.addPass(SimplifyCFGPass());
// Form canonically associated expression trees, and simplify the trees using
// Rotate Loop - disable header duplication at -Oz
LPM1.addPass(LoopRotatePass(Level != Oz));
+ // TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM1.addPass(SimpleLoopUnswitchPass());
LPM2.addPass(IndVarSimplifyPass());
// Re-consider control flow based optimizations after redundancy elimination,
// redo DCE, etc.
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
- FPM.addPass(DSEPass());
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- EnableMSSALoopDependency, DebugLogging));
+ if (Level > O1) {
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+ FPM.addPass(DSEPass());
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ EnableMSSALoopDependency, DebugLogging));
+ }
for (auto &C : ScalarOptimizerLateEPCallbacks)
C(FPM, Level);
// Finally, do an expensive DCE pass to catch all the dead code exposed by
// the simplifications and basic cleanup after all the simplifications.
+ // TODO: Investigate if this is too expensive.
FPM.addPass(ADCEPass());
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
legacy::PassManagerBase &MPM) {
// Start of function pass.
// Break up aggregate allocas, using SSAUpdater.
+ assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!");
MPM.add(createSROAPass());
MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies
- if (EnableGVNHoist)
- MPM.add(createGVNHoistPass());
- if (EnableGVNSink) {
- MPM.add(createGVNSinkPass());
- MPM.add(createCFGSimplificationPass());
+
+ if (OptLevel > 1) {
+ if (EnableGVNHoist)
+ MPM.add(createGVNHoistPass());
+ if (EnableGVNSink) {
+ MPM.add(createGVNSinkPass());
+ MPM.add(createCFGSimplificationPass());
+ }
}
- // Speculative execution if the target has divergent branches; otherwise nop.
- MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
- MPM.add(createJumpThreadingPass()); // Thread jumps.
- MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
+ if (OptLevel > 1) {
+ // Speculative execution if the target has divergent branches; otherwise nop.
+ MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
+
+ MPM.add(createJumpThreadingPass()); // Thread jumps.
+ MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
+ }
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
// Combine silly seq's
if (OptLevel > 2)
if (SizeLevel == 0)
MPM.add(createPGOMemOPSizeOptLegacyPass());
- MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ // TODO: Investigate the cost/benefit of tail call elimination on debugging.
+ if (OptLevel > 1)
+ MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
// Begin the loop pass pipeline.
}
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+ // TODO: Investigate promotion cap for O1.
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
if (EnableSimpleLoopUnswitch)
MPM.add(createSimpleLoopUnswitchLegacyPass());
// opened up by them.
addInstructionCombiningPass(MPM);
addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createJumpThreadingPass()); // Thread jumps
- MPM.add(createCorrelatedValuePropagationPass());
- MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ if (OptLevel > 1) {
+ MPM.add(createJumpThreadingPass()); // Thread jumps
+ MPM.add(createCorrelatedValuePropagationPass());
+ MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ }
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
if (RerollLoops)
MPM.add(createLoopRerollPass());
+ // TODO: Investigate if this is too expensive at O1.
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
// Clean up after everything.
// LTO provides additional opportunities for tailcall elimination due to
// link-time inlining, and visibility of nocapture attribute.
- PM.add(createTailCallEliminationPass());
+ if (OptLevel > 1)
+ PM.add(createTailCallEliminationPass());
// Infer attributes on declarations, call sites, arguments, etc.
PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos
-; GCN-POSTLINK: tail call fast float @_Z3sinf(
-; GCN-POSTLINK: tail call fast float @_Z3cosf(
+; GCN-POSTLINK: call fast float @_Z3sinf(
+; GCN-POSTLINK: call fast float @_Z3cosf(
; GCN-PRELINK: call fast float @_Z6sincosfPf(
-; GCN-NATIVE: tail call fast float @_Z10native_sinf(
-; GCN-NATIVE: tail call fast float @_Z10native_cosf(
+; GCN-NATIVE: call fast float @_Z10native_sinf(
+; GCN-NATIVE: call fast float @_Z10native_cosf(
define amdgpu_kernel void @test_sincos(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3sinf(float %tmp)
+ %call = call fast float @_Z3sinf(float %tmp)
store float %call, float addrspace(1)* %a, align 4
- %call2 = tail call fast float @_Z3cosf(float %tmp)
+ %call2 = call fast float @_Z3cosf(float %tmp)
%arrayidx3 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
store float %call2, float addrspace(1)* %arrayidx3, align 4
ret void
declare float @_Z3cosf(float)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2
-; GCN-POSTLINK: tail call fast <2 x float> @_Z3sinDv2_f(
-; GCN-POSTLINK: tail call fast <2 x float> @_Z3cosDv2_f(
+; GCN-POSTLINK: call fast <2 x float> @_Z3sinDv2_f(
+; GCN-POSTLINK: call fast <2 x float> @_Z3cosDv2_f(
; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_(
-; GCN-NATIVE: tail call fast <2 x float> @_Z10native_sinDv2_f(
-; GCN-NATIVE: tail call fast <2 x float> @_Z10native_cosDv2_f(
+; GCN-NATIVE: call fast <2 x float> @_Z10native_sinDv2_f(
+; GCN-NATIVE: call fast <2 x float> @_Z10native_cosDv2_f(
define amdgpu_kernel void @test_sincos_v2(<2 x float> addrspace(1)* nocapture %a) {
entry:
%tmp = load <2 x float>, <2 x float> addrspace(1)* %a, align 8
- %call = tail call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp)
+ %call = call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp)
store <2 x float> %call, <2 x float> addrspace(1)* %a, align 8
- %call2 = tail call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp)
+ %call2 = call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp)
%arrayidx3 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i64 1
store <2 x float> %call2, <2 x float> addrspace(1)* %arrayidx3, align 8
ret void
declare <2 x float> @_Z3cosDv2_f(<2 x float>)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3
-; GCN-POSTLINK: tail call fast <3 x float> @_Z3sinDv3_f(
-; GCN-POSTLINK: tail call fast <3 x float> @_Z3cosDv3_f(
+; GCN-POSTLINK: call fast <3 x float> @_Z3sinDv3_f(
+; GCN-POSTLINK: call fast <3 x float> @_Z3cosDv3_f(
; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_(
-; GCN-NATIVE: tail call fast <3 x float> @_Z10native_sinDv3_f(
-; GCN-NATIVE: tail call fast <3 x float> @_Z10native_cosDv3_f(
+; GCN-NATIVE: call fast <3 x float> @_Z10native_sinDv3_f(
+; GCN-NATIVE: call fast <3 x float> @_Z10native_cosDv3_f(
define amdgpu_kernel void @test_sincos_v3(<3 x float> addrspace(1)* nocapture %a) {
entry:
%castToVec4 = bitcast <3 x float> addrspace(1)* %a to <4 x float> addrspace(1)*
%loadVec4 = load <4 x float>, <4 x float> addrspace(1)* %castToVec4, align 16
%extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- %call = tail call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4)
+ %call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4)
%extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
store <4 x float> %extractVec6, <4 x float> addrspace(1)* %castToVec4, align 16
- %call11 = tail call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4)
+ %call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4)
%arrayidx12 = getelementptr inbounds <3 x float>, <3 x float> addrspace(1)* %a, i64 1
%extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%storetmp14 = bitcast <3 x float> addrspace(1)* %arrayidx12 to <4 x float> addrspace(1)*
declare <3 x float> @_Z3cosDv3_f(<3 x float>)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4
-; GCN-POSTLINK: tail call fast <4 x float> @_Z3sinDv4_f(
-; GCN-POSTLINK: tail call fast <4 x float> @_Z3cosDv4_f(
+; GCN-POSTLINK: call fast <4 x float> @_Z3sinDv4_f(
+; GCN-POSTLINK: call fast <4 x float> @_Z3cosDv4_f(
; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_(
-; GCN-NATIVE: tail call fast <4 x float> @_Z10native_sinDv4_f(
-; GCN-NATIVE: tail call fast <4 x float> @_Z10native_cosDv4_f(
+; GCN-NATIVE: call fast <4 x float> @_Z10native_sinDv4_f(
+; GCN-NATIVE: call fast <4 x float> @_Z10native_cosDv4_f(
define amdgpu_kernel void @test_sincos_v4(<4 x float> addrspace(1)* nocapture %a) {
entry:
%tmp = load <4 x float>, <4 x float> addrspace(1)* %a, align 16
- %call = tail call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp)
+ %call = call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp)
store <4 x float> %call, <4 x float> addrspace(1)* %a, align 16
- %call2 = tail call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp)
+ %call2 = call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp)
%arrayidx3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a, i64 1
store <4 x float> %call2, <4 x float> addrspace(1)* %arrayidx3, align 16
ret void
declare <4 x float> @_Z3cosDv4_f(<4 x float>)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8
-; GCN-POSTLINK: tail call fast <8 x float> @_Z3sinDv8_f(
-; GCN-POSTLINK: tail call fast <8 x float> @_Z3cosDv8_f(
+; GCN-POSTLINK: call fast <8 x float> @_Z3sinDv8_f(
+; GCN-POSTLINK: call fast <8 x float> @_Z3cosDv8_f(
; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_(
-; GCN-NATIVE: tail call fast <8 x float> @_Z10native_sinDv8_f(
-; GCN-NATIVE: tail call fast <8 x float> @_Z10native_cosDv8_f(
+; GCN-NATIVE: call fast <8 x float> @_Z10native_sinDv8_f(
+; GCN-NATIVE: call fast <8 x float> @_Z10native_cosDv8_f(
define amdgpu_kernel void @test_sincos_v8(<8 x float> addrspace(1)* nocapture %a) {
entry:
%tmp = load <8 x float>, <8 x float> addrspace(1)* %a, align 32
- %call = tail call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp)
+ %call = call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp)
store <8 x float> %call, <8 x float> addrspace(1)* %a, align 32
- %call2 = tail call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp)
+ %call2 = call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp)
%arrayidx3 = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %a, i64 1
store <8 x float> %call2, <8 x float> addrspace(1)* %arrayidx3, align 32
ret void
declare <8 x float> @_Z3cosDv8_f(<8 x float>)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16
-; GCN-POSTLINK: tail call fast <16 x float> @_Z3sinDv16_f(
-; GCN-POSTLINK: tail call fast <16 x float> @_Z3cosDv16_f(
+; GCN-POSTLINK: call fast <16 x float> @_Z3sinDv16_f(
+; GCN-POSTLINK: call fast <16 x float> @_Z3cosDv16_f(
; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_(
-; GCN-NATIVE: tail call fast <16 x float> @_Z10native_sinDv16_f(
-; GCN-NATIVE: tail call fast <16 x float> @_Z10native_cosDv16_f(
+; GCN-NATIVE: call fast <16 x float> @_Z10native_sinDv16_f(
+; GCN-NATIVE: call fast <16 x float> @_Z10native_cosDv16_f(
define amdgpu_kernel void @test_sincos_v16(<16 x float> addrspace(1)* nocapture %a) {
entry:
%tmp = load <16 x float>, <16 x float> addrspace(1)* %a, align 64
- %call = tail call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp)
+ %call = call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp)
store <16 x float> %call, <16 x float> addrspace(1)* %a, align 64
- %call2 = tail call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp)
+ %call2 = call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp)
%arrayidx3 = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %a, i64 1
store <16 x float> %call2, <16 x float> addrspace(1)* %arrayidx3, align 64
ret void
; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a
define amdgpu_kernel void @test_native_recip(float addrspace(1)* nocapture %a) {
entry:
- %call = tail call fast float @_Z12native_recipf(float 3.000000e+00)
+ %call = call fast float @_Z12native_recipf(float 3.000000e+00)
store float %call, float addrspace(1)* %a, align 4
ret void
}
; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a
define amdgpu_kernel void @test_half_recip(float addrspace(1)* nocapture %a) {
entry:
- %call = tail call fast float @_Z10half_recipf(float 3.000000e+00)
+ %call = call fast float @_Z10half_recipf(float 3.000000e+00)
store float %call, float addrspace(1)* %a, align 4
ret void
}
define amdgpu_kernel void @test_native_divide(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00)
+ %call = call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00)
store float %call, float addrspace(1)* %a, align 4
ret void
}
define amdgpu_kernel void @test_half_divide(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00)
+ %call = call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00)
store float %call, float addrspace(1)* %a, align 4
ret void
}
define amdgpu_kernel void @test_pow_0f(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00)
+ %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00)
store float %call, float addrspace(1)* %a, align 4
ret void
}
define amdgpu_kernel void @test_pow_0i(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00)
+ %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00)
store float %call, float addrspace(1)* %a, align 4
ret void
}
entry:
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp = load float, float addrspace(1)* %arrayidx, align 4
- %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00)
+ %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00)
store float %call, float addrspace(1)* %a, align 4
ret void
}
entry:
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp = load float, float addrspace(1)* %arrayidx, align 4
- %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00)
+ %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00)
store float %call, float addrspace(1)* %a, align 4
ret void
}
define amdgpu_kernel void @test_pow_2f(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00)
+ %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00)
store float %call, float addrspace(1)* %a, align 4
ret void
}
define amdgpu_kernel void @test_pow_2i(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00)
+ %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00)
store float %call, float addrspace(1)* %a, align 4
ret void
}
entry:
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp = load float, float addrspace(1)* %arrayidx, align 4
- %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00)
+ %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00)
store float %call, float addrspace(1)* %a, align 4
ret void
}
entry:
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp = load float, float addrspace(1)* %arrayidx, align 4
- %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00)
+ %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00)
store float %call, float addrspace(1)* %a, align 4
ret void
}
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half
-; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 5.000000e-01)
-; GCN-PRELINK: %__pow2sqrt = tail call fast float @_Z4sqrtf(float %tmp)
+; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 5.000000e-01)
+; GCN-PRELINK: %__pow2sqrt = call fast float @_Z4sqrtf(float %tmp)
define amdgpu_kernel void @test_pow_half(float addrspace(1)* nocapture %a) {
entry:
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp = load float, float addrspace(1)* %arrayidx, align 4
- %call = tail call fast float @_Z3powff(float %tmp, float 5.000000e-01)
+ %call = call fast float @_Z3powff(float %tmp, float 5.000000e-01)
store float %call, float addrspace(1)* %a, align 4
ret void
}
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf
-; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float -5.000000e-01)
-; GCN-PRELINK: %__pow2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp)
+; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float -5.000000e-01)
+; GCN-PRELINK: %__pow2rsqrt = call fast float @_Z5rsqrtf(float %tmp)
define amdgpu_kernel void @test_pow_mhalf(float addrspace(1)* nocapture %a) {
entry:
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp = load float, float addrspace(1)* %arrayidx, align 4
- %call = tail call fast float @_Z3powff(float %tmp, float -5.000000e-01)
+ %call = call fast float @_Z3powff(float %tmp, float -5.000000e-01)
store float %call, float addrspace(1)* %a, align 4
ret void
}
entry:
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp = load float, float addrspace(1)* %arrayidx, align 4
- %call = tail call fast float @_Z3powff(float %tmp, float 1.100000e+01)
+ %call = call fast float @_Z3powff(float %tmp, float 1.100000e+01)
store float %call, float addrspace(1)* %a, align 4
ret void
}
entry:
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp = load float, float addrspace(1)* %arrayidx, align 4
- %call = tail call fast float @_Z4powrff(float %tmp, float 1.100000e+01)
+ %call = call fast float @_Z4powrff(float %tmp, float 1.100000e+01)
store float %call, float addrspace(1)* %a, align 4
ret void
}
entry:
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp = load float, float addrspace(1)* %arrayidx, align 4
- %call = tail call fast float @_Z4pownfi(float %tmp, i32 11)
+ %call = call fast float @_Z4pownfi(float %tmp, i32 11)
store float %call, float addrspace(1)* %a, align 4
ret void
}
declare float @_Z4pownfi(float, i32)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow
-; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 1.013000e+03)
-; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp)
-; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)
+; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 1.013000e+03)
+; GCN-PRELINK: %__fabs = call fast float @_Z4fabsf(float %tmp)
+; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %__fabs)
; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03
-; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
+; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx)
; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648
; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3powff(float %tmp, float 1.013000e+03)
+ %call = call fast float @_Z3powff(float %tmp, float 1.013000e+03)
store float %call, float addrspace(1)* %a, align 4
ret void
}
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr
-; GCN-POSTLINK: tail call fast float @_Z4powrff(float %tmp, float %tmp1)
-; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %tmp)
+; GCN-POSTLINK: call fast float @_Z4powrff(float %tmp, float %tmp1)
+; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %tmp)
; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1
-; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
+; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx)
; GCN-PRELINK: store float %__exp2, float addrspace(1)* %a, align 4
-; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp)
+; GCN-NATIVE: %__log2 = call fast float @_Z11native_log2f(float %tmp)
; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1
-; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)
+; GCN-NATIVE: %__exp2 = call fast float @_Z11native_exp2f(float %__ylogx)
; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4
define amdgpu_kernel void @test_powr(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
%arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
- %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1)
+ %call = call fast float @_Z4powrff(float %tmp, float %tmp1)
store float %call, float addrspace(1)* %a, align 4
ret void
}
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown
-; GCN-POSTLINK: tail call fast float @_Z4pownfi(float %tmp, i32 %conv)
+; GCN-POSTLINK: call fast float @_Z4pownfi(float %tmp, i32 %conv)
; GCN-PRELINK: %conv = fptosi float %tmp1 to i32
-; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp)
-; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)
+; GCN-PRELINK: %__fabs = call fast float @_Z4fabsf(float %tmp)
+; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %__fabs)
; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float
; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F
-; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
+; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx)
; GCN-PRELINK: %__yeven = shl i32 %conv, 31
; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]]
%arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
%conv = fptosi float %tmp1 to i32
- %call = tail call fast float @_Z4pownfi(float %tmp, i32 %conv)
+ %call = call fast float @_Z4pownfi(float %tmp, i32 %conv)
store float %call, float addrspace(1)* %a, align 4
ret void
}
entry:
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp = load float, float addrspace(1)* %arrayidx, align 4
- %call = tail call fast float @_Z5rootnfi(float %tmp, i32 1)
+ %call = call fast float @_Z5rootnfi(float %tmp, i32 1)
store float %call, float addrspace(1)* %a, align 4
ret void
}
declare float @_Z5rootnfi(float, i32)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2
-; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 2)
-; GCN-PRELINK: %__rootn2sqrt = tail call fast float @_Z4sqrtf(float %tmp)
+; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 2)
+; GCN-PRELINK: %__rootn2sqrt = call fast float @_Z4sqrtf(float %tmp)
define amdgpu_kernel void @test_rootn_2(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z5rootnfi(float %tmp, i32 2)
+ %call = call fast float @_Z5rootnfi(float %tmp, i32 2)
store float %call, float addrspace(1)* %a, align 4
ret void
}
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3
-; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 3)
-; GCN-PRELINK: %__rootn2cbrt = tail call fast float @_Z4cbrtf(float %tmp)
+; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 3)
+; GCN-PRELINK: %__rootn2cbrt = call fast float @_Z4cbrtf(float %tmp)
define amdgpu_kernel void @test_rootn_3(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z5rootnfi(float %tmp, i32 3)
+ %call = call fast float @_Z5rootnfi(float %tmp, i32 3)
store float %call, float addrspace(1)* %a, align 4
ret void
}
define amdgpu_kernel void @test_rootn_m1(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -1)
+ %call = call fast float @_Z5rootnfi(float %tmp, i32 -1)
store float %call, float addrspace(1)* %a, align 4
ret void
}
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2
-; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 -2)
-; GCN-PRELINK: %__rootn2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp)
+; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 -2)
+; GCN-PRELINK: %__rootn2rsqrt = call fast float @_Z5rsqrtf(float %tmp)
define amdgpu_kernel void @test_rootn_m2(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -2)
+ %call = call fast float @_Z5rootnfi(float %tmp, i32 -2)
store float %call, float addrspace(1)* %a, align 4
ret void
}
define amdgpu_kernel void @test_fma_0x(float addrspace(1)* nocapture %a, float %y) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y)
+ %call = call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y)
store float %call, float addrspace(1)* %a, align 4
ret void
}
define amdgpu_kernel void @test_fma_x0(float addrspace(1)* nocapture %a, float %y) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y)
+ %call = call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y)
store float %call, float addrspace(1)* %a, align 4
ret void
}
define amdgpu_kernel void @test_mad_0x(float addrspace(1)* nocapture %a, float %y) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y)
+ %call = call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y)
store float %call, float addrspace(1)* %a, align 4
ret void
}
define amdgpu_kernel void @test_mad_x0(float addrspace(1)* nocapture %a, float %y) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y)
+ %call = call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y)
store float %call, float addrspace(1)* %a, align 4
ret void
}
define amdgpu_kernel void @test_fma_x1y(float addrspace(1)* nocapture %a, float %y) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y)
+ %call = call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y)
store float %call, float addrspace(1)* %a, align 4
ret void
}
define amdgpu_kernel void @test_fma_1xy(float addrspace(1)* nocapture %a, float %y) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y)
+ %call = call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y)
store float %call, float addrspace(1)* %a, align 4
ret void
}
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp = load float, float addrspace(1)* %arrayidx, align 4
%tmp1 = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00)
+ %call = call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00)
store float %call, float addrspace(1)* %a, align 4
ret void
}
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp
-; GCN-NATIVE: tail call fast float @_Z10native_expf(float %tmp)
+; GCN-NATIVE: call fast float @_Z10native_expf(float %tmp)
define amdgpu_kernel void @test_use_native_exp(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3expf(float %tmp)
+ %call = call fast float @_Z3expf(float %tmp)
store float %call, float addrspace(1)* %a, align 4
ret void
}
declare float @_Z3expf(float)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2
-; GCN-NATIVE: tail call fast float @_Z11native_exp2f(float %tmp)
+; GCN-NATIVE: call fast float @_Z11native_exp2f(float %tmp)
define amdgpu_kernel void @test_use_native_exp2(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z4exp2f(float %tmp)
+ %call = call fast float @_Z4exp2f(float %tmp)
store float %call, float addrspace(1)* %a, align 4
ret void
}
declare float @_Z4exp2f(float)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10
-; GCN-NATIVE: tail call fast float @_Z12native_exp10f(float %tmp)
+; GCN-NATIVE: call fast float @_Z12native_exp10f(float %tmp)
define amdgpu_kernel void @test_use_native_exp10(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z5exp10f(float %tmp)
+ %call = call fast float @_Z5exp10f(float %tmp)
store float %call, float addrspace(1)* %a, align 4
ret void
}
declare float @_Z5exp10f(float)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log
-; GCN-NATIVE: tail call fast float @_Z10native_logf(float %tmp)
+; GCN-NATIVE: call fast float @_Z10native_logf(float %tmp)
define amdgpu_kernel void @test_use_native_log(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3logf(float %tmp)
+ %call = call fast float @_Z3logf(float %tmp)
store float %call, float addrspace(1)* %a, align 4
ret void
}
declare float @_Z3logf(float)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2
-; GCN-NATIVE: tail call fast float @_Z11native_log2f(float %tmp)
+; GCN-NATIVE: call fast float @_Z11native_log2f(float %tmp)
define amdgpu_kernel void @test_use_native_log2(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z4log2f(float %tmp)
+ %call = call fast float @_Z4log2f(float %tmp)
store float %call, float addrspace(1)* %a, align 4
ret void
}
declare float @_Z4log2f(float)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10
-; GCN-NATIVE: tail call fast float @_Z12native_log10f(float %tmp)
+; GCN-NATIVE: call fast float @_Z12native_log10f(float %tmp)
define amdgpu_kernel void @test_use_native_log10(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z5log10f(float %tmp)
+ %call = call fast float @_Z5log10f(float %tmp)
store float %call, float addrspace(1)* %a, align 4
ret void
}
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr
; GCN-NATIVE: %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
-; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp)
+; GCN-NATIVE: %__log2 = call fast float @_Z11native_log2f(float %tmp)
; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1
-; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)
+; GCN-NATIVE: %__exp2 = call fast float @_Z11native_exp2f(float %__ylogx)
; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4
define amdgpu_kernel void @test_use_native_powr(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
%arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
- %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1)
+ %call = call fast float @_Z4powrff(float %tmp, float %tmp1)
store float %call, float addrspace(1)* %a, align 4
ret void
}
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt
-; GCN-NATIVE: tail call fast float @_Z11native_sqrtf(float %tmp)
+; GCN-NATIVE: call fast float @_Z11native_sqrtf(float %tmp)
define amdgpu_kernel void @test_use_native_sqrt(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z4sqrtf(float %tmp)
+ %call = call fast float @_Z4sqrtf(float %tmp)
store float %call, float addrspace(1)* %a, align 4
ret void
}
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64
-; GCN: tail call fast double @_Z4sqrtd(double %tmp)
+; GCN: call fast double @_Z4sqrtd(double %tmp)
define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) {
entry:
%tmp = load double, double addrspace(1)* %a, align 8
- %call = tail call fast double @_Z4sqrtd(double %tmp)
+ %call = call fast double @_Z4sqrtd(double %tmp)
store double %call, double addrspace(1)* %a, align 8
ret void
}
declare double @_Z4sqrtd(double)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt
-; GCN-NATIVE: tail call fast float @_Z12native_rsqrtf(float %tmp)
+; GCN-NATIVE: call fast float @_Z12native_rsqrtf(float %tmp)
define amdgpu_kernel void @test_use_native_rsqrt(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z5rsqrtf(float %tmp)
+ %call = call fast float @_Z5rsqrtf(float %tmp)
store float %call, float addrspace(1)* %a, align 4
ret void
}
declare float @_Z5rsqrtf(float)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan
-; GCN-NATIVE: tail call fast float @_Z10native_tanf(float %tmp)
+; GCN-NATIVE: call fast float @_Z10native_tanf(float %tmp)
define amdgpu_kernel void @test_use_native_tan(float addrspace(1)* nocapture %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
- %call = tail call fast float @_Z3tanf(float %tmp)
+ %call = call fast float @_Z3tanf(float %tmp)
store float %call, float addrspace(1)* %a, align 4
ret void
}
declare float @_Z3tanf(float)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos
-; GCN-NATIVE: tail call float @_Z10native_sinf(float %tmp)
-; GCN-NATIVE: tail call float @_Z10native_cosf(float %tmp)
+; GCN-NATIVE: call float @_Z10native_sinf(float %tmp)
+; GCN-NATIVE: call float @_Z10native_cosf(float %tmp)
define amdgpu_kernel void @test_use_native_sincos(float addrspace(1)* %a) {
entry:
%tmp = load float, float addrspace(1)* %a, align 4
%arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
%tmp1 = addrspacecast float addrspace(1)* %arrayidx1 to float*
- %call = tail call fast float @_Z6sincosfPf(float %tmp, float* %tmp1)
+ %call = call fast float @_Z6sincosfPf(float %tmp, float* %tmp1)
store float %call, float addrspace(1)* %a, align 4
ret void
}
entry:
%tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
%tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8*
- %tmp2 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
- %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4)
- %tmp4 = tail call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
- tail call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4)
+ %tmp2 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
+ %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4)
+ %tmp4 = call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
+ call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4)
ret void
}
entry:
%tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
%tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8*
- %tmp2 = tail call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
- %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0
- %tmp4 = tail call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
- tail call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0
+ %tmp2 = call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
+ %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0
+ %tmp4 = call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
+ call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0
ret void
}
define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 {
entry:
%tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8*
- %tmp1 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0
+ %tmp1 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0
%tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)*
%tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8*
- %tmp4 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0
+ %tmp4 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0
%tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)*
%tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8*
- %tmp7 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0
+ %tmp7 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0
%tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)*
%tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8*
- %tmp10 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0
+ %tmp10 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0
%tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)*
%tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8*
- %tmp13 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0
+ %tmp13 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0
%tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)*
%tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8*
- %tmp16 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0
+ %tmp16 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0
%tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)*
%tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8*
- %tmp19 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0
+ %tmp19 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0
%tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)*
%tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8*
- %tmp22 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0
+ %tmp22 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0
%tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)*
%tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8*
- %tmp25 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0
+ %tmp25 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0
ret void
}
; IR passes run at -O1 and higher.
; OPT-O1-DAG: Skipping pass 'Aggressive Dead Code Elimination'
; OPT-O1-DAG: Skipping pass 'Combine redundant instructions'
-; OPT-O1-DAG: Skipping pass 'Dead Store Elimination'
; OPT-O1-DAG: Skipping pass 'Early CSE'
-; OPT-O1-DAG: Skipping pass 'Jump Threading'
-; OPT-O1-DAG: Skipping pass 'MemCpy Optimization'
; OPT-O1-DAG: Skipping pass 'Reassociate expressions'
; OPT-O1-DAG: Skipping pass 'Simplify the CFG'
; OPT-O1-DAG: Skipping pass 'Sparse Conditional Constant Propagation'
-; OPT-O1-DAG: Skipping pass 'SROA'
-; OPT-O1-DAG: Skipping pass 'Tail Call Elimination'
-; OPT-O1-DAG: Skipping pass 'Value Propagation'
; Additional IR passes run at -O2 and higher.
; OPT-O2O3-DAG: Skipping pass 'Global Value Numbering'
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O1
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes='default<O2>' -S %s 2>&1 \
-; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2
+; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \
+; RUN: --check-prefix=CHECK-O23SZ
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes='default<O3>' -S %s 2>&1 \
-; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3
+; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \
+; RUN: --check-prefix=CHECK-O23SZ
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes='default<Os>' -S %s 2>&1 \
-; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Os
+; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Os \
+; RUN: --check-prefix=CHECK-O23SZ
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes='default<Oz>' -S %s 2>&1 \
-; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Oz
+; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Oz \
+; RUN: --check-prefix=CHECK-O23SZ
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes='lto-pre-link<O2>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \
-; RUN: --check-prefix=CHECK-O2-LTO
+; RUN: --check-prefix=CHECK-O2-LTO --check-prefix=CHECK-O23SZ
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes-ep-peephole='no-op-function' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \
-; RUN: --check-prefix=CHECK-EP-PEEPHOLE
+; RUN: --check-prefix=CHECK-EP-PEEPHOLE --check-prefix=CHECK-O23SZ
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes-ep-late-loop-optimizations='no-op-loop' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \
-; RUN: --check-prefix=CHECK-EP-LOOP-LATE
+; RUN: --check-prefix=CHECK-EP-LOOP-LATE --check-prefix=CHECK-O23SZ
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes-ep-loop-optimizer-end='no-op-loop' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \
-; RUN: --check-prefix=CHECK-EP-LOOP-END
+; RUN: --check-prefix=CHECK-EP-LOOP-END --check-prefix=CHECK-O23SZ
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes-ep-scalar-optimizer-late='no-op-function' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \
-; RUN: --check-prefix=CHECK-EP-SCALAR-LATE
+; RUN: --check-prefix=CHECK-EP-SCALAR-LATE --check-prefix=CHECK-O23SZ
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes-ep-cgscc-optimizer-late='no-op-cgscc' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \
-; RUN: --check-prefix=CHECK-EP-CGSCC-LATE
+; RUN: --check-prefix=CHECK-EP-CGSCC-LATE --check-prefix=CHECK-O23SZ
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes-ep-vectorizer-start='no-op-function' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \
-; RUN: --check-prefix=CHECK-EP-VECTORIZER-START
+; RUN: --check-prefix=CHECK-EP-VECTORIZER-START --check-prefix=CHECK-O23SZ
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes-ep-pipeline-start='no-op-module' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \
-; RUN: --check-prefix=CHECK-EP-PIPELINE-START
+; RUN: --check-prefix=CHECK-EP-PIPELINE-START --check-prefix=CHECK-O23SZ
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes-ep-pipeline-start='no-op-module' \
; RUN: -passes='lto-pre-link<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \
-; RUN: --check-prefix=CHECK-EP-PIPELINE-START
+; RUN: --check-prefix=CHECK-EP-PIPELINE-START --check-prefix=CHECK-O23SZ
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes-ep-optimizer-last='no-op-function' \
; RUN: -passes='default<O3>' -S %s 2>&1 \
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \
-; RUN: --check-prefix=CHECK-EP-OPTIMIZER-LAST
+; RUN: --check-prefix=CHECK-EP-OPTIMIZER-LAST --check-prefix=CHECK-O23SZ
; CHECK-O: Running analysis: PassInstrumentationAnalysis
; CHECK-O-NEXT: Starting llvm::Module pass manager run.
; CHECK-O-NEXT: Running pass: SROA
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
-; CHECK-O-NEXT: Running pass: SpeculativeExecutionPass
-; CHECK-O-NEXT: Running pass: JumpThreadingPass
-; CHECK-O-NEXT: Running analysis: LazyValueAnalysis
-; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass
+; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
+; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
+; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
+; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O3-NEXT: AggressiveInstCombinePass
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass
; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass
-; CHECK-O-NEXT: Running pass: TailCallElimPass
+; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: ReassociatePass
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass
; CHECK-O-NEXT: Finished Loop pass manager run.
; CHECK-O-NEXT: Running pass: SROA on foo
-; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass
-; CHECK-Os-NEXT: Running pass: GVN
-; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis
-; CHECK-Os-NEXT: Running analysis: PhiValuesAnalysis
-; CHECK-Oz-NEXT: Running pass: MergedLoadStoreMotionPass
-; CHECK-Oz-NEXT: Running pass: GVN
-; CHECK-Oz-NEXT: Running analysis: MemoryDependenceAnalysis
-; CHECK-Oz-NEXT: Running analysis: PhiValuesAnalysis
-; CHECK-O2-NEXT: Running pass: MergedLoadStoreMotionPass
-; CHECK-O2-NEXT: Running pass: GVN
-; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis
-; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis
-; CHECK-O3-NEXT: Running pass: MergedLoadStoreMotionPass
-; CHECK-O3-NEXT: Running pass: GVN
-; CHECK-O3-NEXT: Running analysis: MemoryDependenceAnalysis
-; CHECK-O3-NEXT: Running analysis: PhiValuesAnalysis
+; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
+; CHECK-O23SZ-NEXT: Running pass: GVN
+; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
+; CHECK-O23SZ-NEXT: Running analysis: PhiValuesAnalysis
; CHECK-O-NEXT: Running pass: MemCpyOptPass
; CHECK-O1-NEXT: Running analysis: MemoryDependenceAnalysis
; CHECK-O1-NEXT: Running analysis: PhiValuesAnalysis
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass
-; CHECK-O-NEXT: Running pass: JumpThreadingPass
-; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass
-; CHECK-O-NEXT: Running pass: DSEPass
-; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}>
-; CHECK-O-NEXT: Starting llvm::Function pass manager run.
-; CHECK-O-NEXT: Running pass: LoopSimplifyPass
-; CHECK-O-NEXT: Running pass: LCSSAPass
-; CHECK-O-NEXT: Finished llvm::Function pass manager run.
+; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
+; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass
+; CHECK-O23SZ-NEXT: Running pass: DSEPass
+; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}>
+; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run.
+; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
+; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
+; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run.
; CHECK-EP-SCALAR-LATE-NEXT: Running pass: NoOpFunctionPass
; CHECK-O-NEXT: Running pass: ADCEPass
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O1
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes='thinlto-pre-link<O2>,name-anon-globals' -S %s 2>&1 \
-; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O2
+; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O2
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes='thinlto-pre-link<O3>,name-anon-globals' -S -passes-ep-pipeline-start='no-op-module' %s 2>&1 \
-; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O3,CHECK-EP-PIPELINE-START
+; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O3,CHECK-EP-PIPELINE-START
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes='thinlto-pre-link<Os>,name-anon-globals' -S %s 2>&1 \
-; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Os
+; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Os
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes='thinlto-pre-link<Oz>,name-anon-globals' -S %s 2>&1 \
-; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Oz
+; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Oz
; RUN: opt -disable-verify -debug-pass-manager -new-pm-debug-info-for-profiling \
; RUN: -passes='thinlto-pre-link<O2>,name-anon-globals' -S %s 2>&1 \
-; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-PRELINK-O,CHECK-PRELINK-O2
+; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O2
;
; Postlink pipelines:
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-POSTLINK-O,CHECK-POSTLINK-O1
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
-; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-POSTLINK-O,CHECK-POSTLINK-O2
+; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-O2
; RUN: opt -disable-verify -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \
; RUN: -passes='thinlto<O3>' -S %s 2>&1 \
-; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-POSTLINK-O,CHECK-POSTLINK-O3
+; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-O3
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes='thinlto<Os>' -S %s 2>&1 \
-; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-POSTLINK-O,CHECK-POSTLINK-Os
+; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-Os
; RUN: opt -disable-verify -debug-pass-manager \
; RUN: -passes='thinlto<Oz>' -S %s 2>&1 \
-; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-POSTLINK-O,CHECK-POSTLINK-Oz
+; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-Oz
; RUN: opt -disable-verify -debug-pass-manager -new-pm-debug-info-for-profiling \
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
-; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-POSTLINK-O,CHECK-POSTLINK-O2
+; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-O2
;
; CHECK-O: Running analysis: PassInstrumentationAnalysis
; CHECK-O-NEXT: Starting llvm::Module pass manager run.
; CHECK-O-NEXT: Running pass: SROA
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
-; CHECK-O-NEXT: Running pass: SpeculativeExecutionPass
-; CHECK-O-NEXT: Running pass: JumpThreadingPass
-; CHECK-O-NEXT: Running analysis: LazyValueAnalysis
-; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass
+; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
+; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
+; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
+; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass
; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass
-; CHECK-O-NEXT: Running pass: TailCallElimPass
+; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: ReassociatePass
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
; CHECK-O-NEXT: Running pass: BDCEPass
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
; CHECK-O-NEXT: Running pass: InstCombinePass
-; CHECK-O-NEXT: Running pass: JumpThreadingPass
-; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass
-; CHECK-O-NEXT: Running pass: DSEPass
-; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}>
-; CHECK-O-NEXT: Starting llvm::Function pass manager run
-; CHECK-O-NEXT: Running pass: LoopSimplifyPass
-; CHECK-O-NEXT: Running pass: LCSSAPass
-; CHECK-O-NEXT: Finished llvm::Function pass manager run
+; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
+; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass
+; CHECK-O23SZ-NEXT: Running pass: DSEPass
+; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}>
+; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run
+; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
+; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
+; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run
; CHECK-O-NEXT: Running pass: ADCEPass
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
-; RUN: opt < %s -O1 -S | FileCheck %s
+; RUN: opt < %s -O2 -S | FileCheck %s
; performCallSlotOptzn in MemCpy should not exchange the calls to
; @llvm.lifetime.start and @llvm.memcpy.
define i1 @PR33605(i32 %a, i32 %b, i32* %c) {
; ALL-LABEL: @PR33605(
-; ALL-NEXT: for.body:
+; ALL-NEXT: entry:
; ALL-NEXT: [[OR:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1
; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; ALL-NEXT: br i1 [[CMP]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; ALL: if.then:
; ALL-NEXT: store i32 [[OR]], i32* [[ARRAYIDX]], align 4
-; ALL-NEXT: tail call void @foo()
+; ALL-NEXT: call void @foo()
; ALL-NEXT: br label [[IF_END]]
; ALL: if.end:
-; ALL-NEXT: [[CHANGED_1_OFF0:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[FOR_BODY:%.*]] ]
+; ALL-NEXT: [[CHANGED_1_OFF0:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[ENTRY:%.*]] ]
; ALL-NEXT: [[TMP1:%.*]] = load i32, i32* [[C]], align 4
; ALL-NEXT: [[CMP_1:%.*]] = icmp eq i32 [[OR]], [[TMP1]]
; ALL-NEXT: br i1 [[CMP_1]], label [[IF_END_1:%.*]], label [[IF_THEN_1:%.*]]
; ALL: if.then.1:
; ALL-NEXT: store i32 [[OR]], i32* [[C]], align 4
-; ALL-NEXT: tail call void @foo()
+; ALL-NEXT: call void @foo()
; ALL-NEXT: br label [[IF_END_1]]
; ALL: if.end.1:
; ALL-NEXT: [[CHANGED_1_OFF0_1:%.*]] = phi i1 [ true, [[IF_THEN_1]] ], [ [[CHANGED_1_OFF0]], [[IF_END]] ]
define i32 @two_shifts_by_sext_with_extra_use(i32 %val, i8 signext %len) {
; CHECK-LABEL: @two_shifts_by_sext_with_extra_use(
; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN:%.*]] to i32
-; CHECK-NEXT: tail call void @use_int32(i32 [[CONV]])
+; CHECK-NEXT: call void @use_int32(i32 [[CONV]])
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[CONV]]
; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[CONV]]
; CHECK-NEXT: ret i32 [[SHR]]
define i32 @two_shifts_by_same_sext_with_extra_use(i32 %val, i8 signext %len) {
; CHECK-LABEL: @two_shifts_by_same_sext_with_extra_use(
; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN:%.*]] to i32
-; CHECK-NEXT: tail call void @use_int32(i32 [[CONV]])
+; CHECK-NEXT: call void @use_int32(i32 [[CONV]])
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[CONV]]
; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[CONV]]
; CHECK-NEXT: ret i32 [[SHR]]