target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
-; CHECK: @llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @memprof.module_ctor to i8*)]
-; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @memprof.module_ctor, i8* null }]
+; CHECK: @llvm.used = appending global [1 x ptr] [ptr @memprof.module_ctor]
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @memprof.module_ctor, ptr null }]
-define i32 @test_load(i32* %a) {
+define i32 @test_load(ptr %a) {
entry:
- %tmp1 = load i32, i32* %a, align 4
+ %tmp1 = load i32, ptr %a, align 4
ret i32 %tmp1
}
; CHECK-LABEL: @test_load
-; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, i64* @__memprof_shadow_memory_dynamic_address
-; CHECK-NEXT: %[[LOAD_ADDR:[^ ]*]] = ptrtoint i32* %a to i64
+; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, ptr @__memprof_shadow_memory_dynamic_address
+; CHECK-NEXT: %[[LOAD_ADDR:[^ ]*]] = ptrtoint ptr %a to i64
; CHECK-NEXT: %[[MASKED_ADDR:[^ ]*]] = and i64 %[[LOAD_ADDR]], -64
; CHECK-S3-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 3
; CHECK-S5-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 5
; CHECK-NEXT: add i64 %[[SHIFTED_ADDR]], %[[SHADOW_OFFSET]]
; CHECK-NEXT: %[[LOAD_SHADOW_PTR:[^ ]*]] = inttoptr
-; CHECK-NEXT: %[[LOAD_SHADOW:[^ ]*]] = load i64, i64* %[[LOAD_SHADOW_PTR]]
+; CHECK-NEXT: %[[LOAD_SHADOW:[^ ]*]] = load i64, ptr %[[LOAD_SHADOW_PTR]]
; CHECK-NEXT: %[[NEW_SHADOW:[^ ]*]] = add i64 %[[LOAD_SHADOW]], 1
-; CHECK-NEXT: store i64 %[[NEW_SHADOW]], i64* %[[LOAD_SHADOW_PTR]]
+; CHECK-NEXT: store i64 %[[NEW_SHADOW]], ptr %[[LOAD_SHADOW_PTR]]
; The actual load.
-; CHECK-NEXT: %tmp1 = load i32, i32* %a
+; CHECK-NEXT: %tmp1 = load i32, ptr %a
; CHECK-NEXT: ret i32 %tmp1
-define void @test_store(i32* %a) {
+define void @test_store(ptr %a) {
entry:
- store i32 42, i32* %a, align 4
+ store i32 42, ptr %a, align 4
ret void
}
; CHECK-LABEL: @test_store
-; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, i64* @__memprof_shadow_memory_dynamic_address
-; CHECK-NEXT: %[[STORE_ADDR:[^ ]*]] = ptrtoint i32* %a to i64
+; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, ptr @__memprof_shadow_memory_dynamic_address
+; CHECK-NEXT: %[[STORE_ADDR:[^ ]*]] = ptrtoint ptr %a to i64
; CHECK-NEXT: %[[MASKED_ADDR:[^ ]*]] = and i64 %[[STORE_ADDR]], -64
; CHECK-S3-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 3
; CHECK-S5-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 5
; CHECK-NEXT: add i64 %[[SHIFTED_ADDR]], %[[SHADOW_OFFSET]]
; CHECK-NEXT: %[[STORE_SHADOW_PTR:[^ ]*]] = inttoptr
-; CHECK-NEXT: %[[STORE_SHADOW:[^ ]*]] = load i64, i64* %[[STORE_SHADOW_PTR]]
+; CHECK-NEXT: %[[STORE_SHADOW:[^ ]*]] = load i64, ptr %[[STORE_SHADOW_PTR]]
; CHECK-NEXT: %[[NEW_SHADOW:[^ ]*]] = add i64 %[[STORE_SHADOW]], 1
-; CHECK-NEXT: store i64 %[[NEW_SHADOW]], i64* %[[STORE_SHADOW_PTR]]
+; CHECK-NEXT: store i64 %[[NEW_SHADOW]], ptr %[[STORE_SHADOW_PTR]]
; The actual store.
-; CHECK-NEXT: store i32 42, i32* %a
+; CHECK-NEXT: store i32 42, ptr %a
; CHECK-NEXT: ret void
-define void @FP80Test(x86_fp80* nocapture %a) nounwind uwtable {
+define void @FP80Test(ptr nocapture %a) nounwind uwtable {
entry:
- store x86_fp80 0xK3FFF8000000000000000, x86_fp80* %a, align 16
+ store x86_fp80 0xK3FFF8000000000000000, ptr %a, align 16
ret void
}
; CHECK-LABEL: @FP80Test
; CHECK-NEXT: store i64 %[[NEW_ST_SHADOW]]
; CHECK-NOT: store i64
; The actual store.
-; CHECK: store x86_fp80 0xK3FFF8000000000000000, x86_fp80* %a
+; CHECK: store x86_fp80 0xK3FFF8000000000000000, ptr %a
; CHECK: ret void
-define void @i40test(i40* %a, i40* %b) nounwind uwtable {
+define void @i40test(ptr %a, ptr %b) nounwind uwtable {
entry:
- %t = load i40, i40* %a
- store i40 %t, i40* %b, align 8
+ %t = load i40, ptr %a
+ store i40 %t, ptr %b, align 8
ret void
}
; CHECK-LABEL: @i40test
; CHECK-NEXT: store i64 %[[NEW_LD_SHADOW]]
; CHECK-NOT: store i64
; The actual load.
-; CHECK: %t = load i40, i40* %a
+; CHECK: %t = load i40, ptr %a
; Exactly one shadow update for store access.
; CHECK-NOT: store i64
; CHECK: %[[NEW_ST_SHADOW:[^ ]*]] = add i64 %{{.*}}, 1
; CHECK-NEXT: store i64 %[[NEW_ST_SHADOW]]
; CHECK-NOT: store i64
; The actual store.
-; CHECK: store i40 %t, i40* %b
+; CHECK: store i40 %t, ptr %b
; CHECK: ret void
-define void @i64test_align1(i64* %b) nounwind uwtable {
+define void @i64test_align1(ptr %b) nounwind uwtable {
entry:
- store i64 0, i64* %b, align 1
+ store i64 0, ptr %b, align 1
ret void
}
; CHECK-LABEL: @i64test
; CHECK-NEXT: store i64 %[[NEW_ST_SHADOW]]
; CHECK-NOT: store i64
; The actual store.
-; CHECK: store i64 0, i64* %b
+; CHECK: store i64 0, ptr %b
; CHECK: ret void
-define void @i80test(i80* %a, i80* %b) nounwind uwtable {
+define void @i80test(ptr %a, ptr %b) nounwind uwtable {
entry:
- %t = load i80, i80* %a
- store i80 %t, i80* %b, align 8
+ %t = load i80, ptr %a
+ store i80 %t, ptr %b, align 8
ret void
}
; CHECK-LABEL: i80test
; CHECK-NEXT: store i64 %[[NEW_LD_SHADOW]]
; CHECK-NOT: store i64
; The actual load.
-; CHECK: %t = load i80, i80* %a
+; CHECK: %t = load i80, ptr %a
; Exactly one shadow update for store access.
; CHECK-NOT: store i64
; CHECK: %[[NEW_ST_SHADOW:[^ ]*]] = add i64 %{{.*}}, 1
; CHECK-NEXT: store i64 %[[NEW_ST_SHADOW]]
; CHECK-NOT: store i64
; The actual store.
-; CHECK: store i80 %t, i80* %b
+; CHECK: store i80 %t, ptr %b
; CHECK: ret void
; memprof should not instrument functions with available_externally linkage.
-define available_externally i32 @f_available_externally(i32* %a) {
+define available_externally i32 @f_available_externally(ptr %a) {
entry:
- %tmp1 = load i32, i32* %a
+ %tmp1 = load i32, ptr %a
ret i32 %tmp1
}
; CHECK-LABEL: @f_available_externally
; CHECK-NOT: __memprof_shadow_memory_dynamic_address
; CHECK: ret i32
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
-declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) nounwind
+declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
+declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) nounwind
+declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) nounwind
-define void @memintr_test(i8* %a, i8* %b) nounwind uwtable {
+define void @memintr_test(ptr %a, ptr %b) nounwind uwtable {
entry:
- tail call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 100, i1 false)
- tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %a, i8* %b, i64 100, i1 false)
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 100, i1 false)
+ tail call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 100, i1 false)
+ tail call void @llvm.memmove.p0.p0.i64(ptr %a, ptr %b, i64 100, i1 false)
+ tail call void @llvm.memcpy.p0.p0.i64(ptr %a, ptr %b, i64 100, i1 false)
ret void
}
; CHECK: __memprof_memcpy
; CHECK: ret void
-declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture writeonly, i8, i64, i32) nounwind
-declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32) nounwind
-declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32) nounwind
+declare void @llvm.memset.element.unordered.atomic.p0.i64(ptr nocapture writeonly, i8, i64, i32) nounwind
+declare void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i32) nounwind
+declare void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i32) nounwind
-define void @memintr_element_atomic_test(i8* %a, i8* %b) nounwind uwtable {
+define void @memintr_element_atomic_test(ptr %a, ptr %b) nounwind uwtable {
; This is a canary test to make sure that these don't get lowered into calls that don't
; have the element-atomic property. Eventually, memprof will have to be enhanced to lower
; these properly.
; CHECK-LABEL: memintr_element_atomic_test
- ; CHECK: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %a, i8 0, i64 100, i32 1)
- ; CHECK: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 100, i32 1)
- ; CHECK: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 100, i32 1)
+ ; CHECK: tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 1 %a, i8 0, i64 100, i32 1)
+ ; CHECK: tail call void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr align 1 %a, ptr align 1 %b, i64 100, i32 1)
+ ; CHECK: tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 %a, ptr align 1 %b, i64 100, i32 1)
; CHECK: ret void
- tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %a, i8 0, i64 100, i32 1)
- tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 100, i32 1)
- tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 100, i32 1)
+ tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 1 %a, i8 0, i64 100, i32 1)
+ tail call void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr align 1 %a, ptr align 1 %b, i64 100, i32 1)
+ tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 %a, ptr align 1 %b, i64 100, i32 1)
ret void
}
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-@v4f32 = global <4 x float>* zeroinitializer, align 8
-@v8i32 = global <8 x i32>* zeroinitializer, align 8
-@v4i64 = global <4 x i32*>* zeroinitializer, align 8
+@v4f32 = global ptr zeroinitializer, align 8
+@v8i32 = global ptr zeroinitializer, align 8
+@v4i64 = global ptr zeroinitializer, align 8
;;;;;;;;;;;;;;;; STORE
-declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) argmemonly nounwind
-declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>) argmemonly nounwind
-declare void @llvm.masked.store.v4p0i32.p0v4p0i32(<4 x i32*>, <4 x i32*>*, i32, <4 x i1>) argmemonly nounwind
+declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) argmemonly nounwind
+declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32, <8 x i1>) argmemonly nounwind
+declare void @llvm.masked.store.v4p0.p0(<4 x ptr>, ptr, i32, <4 x i1>) argmemonly nounwind
define void @store.v4f32.1110(<4 x float> %arg) {
; ALL-LABEL: @store.v4f32.1110
- %p = load <4 x float>*, <4 x float>** @v4f32, align 8
+ %p = load ptr, ptr @v4f32, align 8
; NOSTORE-NOT: call void @__memprof_store
-; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0
-; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64
+; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 0
+; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP0]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP0]])
-; STORE: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 1
-; STORE: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP1]] to i64
+; STORE: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 1
+; STORE: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP1]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP1]])
-; STORE: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 2
-; STORE: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP2]] to i64
+; STORE: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 2
+; STORE: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP2]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP2]])
-; STORE: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 false>)
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 false>)
+; STORE: tail call void @llvm.masked.store.v4f32.p0(<4 x float> %arg, ptr %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 false>)
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %arg, ptr %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 false>)
ret void
}
define void @store.v8i32.10010110(<8 x i32> %arg) {
; ALL-LABEL: @store.v8i32.10010110
- %p = load <8 x i32>*, <8 x i32>** @v8i32, align 8
+ %p = load ptr, ptr @v8i32, align 8
; NOSTORE-NOT: call void @__memprof_store
-; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 0
-; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP0]] to i64
+; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, ptr %p, i64 0, i64 0
+; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP0]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP0]])
-; STORE: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 3
-; STORE: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP3]] to i64
+; STORE: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, ptr %p, i64 0, i64 3
+; STORE: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP3]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP3]])
-; STORE: [[GEP5:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 5
-; STORE: [[PGEP5:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP5]] to i64
+; STORE: [[GEP5:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, ptr %p, i64 0, i64 5
+; STORE: [[PGEP5:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP5]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP5]])
-; STORE: [[GEP6:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 6
-; STORE: [[PGEP6:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP6]] to i64
+; STORE: [[GEP6:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, ptr %p, i64 0, i64 6
+; STORE: [[PGEP6:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP6]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP6]])
-; STORE: tail call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %arg, <8 x i32>* %p, i32 8, <8 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>)
- tail call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %arg, <8 x i32>* %p, i32 8, <8 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>)
+; STORE: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> %arg, ptr %p, i32 8, <8 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>)
+ tail call void @llvm.masked.store.v8i32.p0(<8 x i32> %arg, ptr %p, i32 8, <8 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>)
ret void
}
-define void @store.v4i64.0001(<4 x i32*> %arg) {
+define void @store.v4i64.0001(<4 x ptr> %arg) {
; ALL-LABEL: @store.v4i64.0001
- %p = load <4 x i32*>*, <4 x i32*>** @v4i64, align 8
+ %p = load ptr, ptr @v4i64, align 8
; NOSTORE-NOT: call void @__memprof_store
-; STORE: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x i32*>, <4 x i32*>* %p, i64 0, i64 3
-; STORE: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint i32** [[GEP3]] to i64
+; STORE: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x ptr>, ptr %p, i64 0, i64 3
+; STORE: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP3]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP3]])
-; STORE: tail call void @llvm.masked.store.v4p0i32.p0v4p0i32(<4 x i32*> %arg, <4 x i32*>* %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
- tail call void @llvm.masked.store.v4p0i32.p0v4p0i32(<4 x i32*> %arg, <4 x i32*>* %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
+; STORE: tail call void @llvm.masked.store.v4p0.p0(<4 x ptr> %arg, ptr %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
+ tail call void @llvm.masked.store.v4p0.p0(<4 x ptr> %arg, ptr %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
ret void
}
define void @store.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) {
; ALL-LABEL: @store.v4f32.variable
- %p = load <4 x float>*, <4 x float>** @v4f32, align 8
+ %p = load ptr, ptr @v4f32, align 8
; STORE: [[MASK0:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 0
; STORE: br i1 [[MASK0]], label %[[THEN0:[0-9A-Za-z]+]], label %[[AFTER0:[0-9A-Za-z]+]]
; STORE: [[THEN0]]:
-; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0
-; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64
+; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 0
+; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP0]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP0]])
; STORE: br label %[[AFTER0]]
; STORE: [[AFTER0]]:
; STORE: [[MASK1:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 1
; STORE: br i1 [[MASK1]], label %[[THEN1:[0-9A-Za-z]+]], label %[[AFTER1:[0-9A-Za-z]+]]
; STORE: [[THEN1]]:
-; STORE: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 1
-; STORE: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP1]] to i64
+; STORE: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 1
+; STORE: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP1]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP1]])
; STORE: br label %[[AFTER1]]
; STORE: [[AFTER1]]:
; STORE: [[MASK2:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 2
; STORE: br i1 [[MASK2]], label %[[THEN2:[0-9A-Za-z]+]], label %[[AFTER2:[0-9A-Za-z]+]]
; STORE: [[THEN2]]:
-; STORE: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 2
-; STORE: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP2]] to i64
+; STORE: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 2
+; STORE: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP2]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP2]])
; STORE: br label %[[AFTER2]]
; STORE: [[AFTER2]]:
; STORE: [[MASK3:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 3
; STORE: br i1 [[MASK3]], label %[[THEN3:[0-9A-Za-z]+]], label %[[AFTER3:[0-9A-Za-z]+]]
; STORE: [[THEN3]]:
-; STORE: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 3
-; STORE: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP3]] to i64
+; STORE: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 3
+; STORE: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP3]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP3]])
; STORE: br label %[[AFTER3]]
; STORE: [[AFTER3]]:
-; STORE: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> %mask)
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> %mask)
+; STORE: tail call void @llvm.masked.store.v4f32.p0(<4 x float> %arg, ptr %p, i32 4, <4 x i1> %mask)
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %arg, ptr %p, i32 4, <4 x i1> %mask)
ret void
}
;; Store using two masked.stores, which should instrument them both.
define void @store.v4f32.1010.split(<4 x float> %arg) {
; BOTH-LABEL: @store.v4f32.1010.split
- %p = load <4 x float>*, <4 x float>** @v4f32, align 8
-; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0
-; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64
+ %p = load ptr, ptr @v4f32, align 8
+; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 0
+; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP0]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP0]])
-; STORE: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
-; STORE: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 2
-; STORE: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP1]] to i64
+; STORE: tail call void @llvm.masked.store.v4f32.p0(<4 x float> %arg, ptr %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %arg, ptr %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
+; STORE: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 2
+; STORE: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP1]] to i64
; STORE: call void @__memprof_store(i64 [[PGEP1]])
-; STORE: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>)
- tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>)
+; STORE: tail call void @llvm.masked.store.v4f32.p0(<4 x float> %arg, ptr %p, i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>)
+ tail call void @llvm.masked.store.v4f32.p0(<4 x float> %arg, ptr %p, i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>)
ret void
}
;;;;;;;;;;;;;;;; LOAD
-declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) argmemonly nounwind
-declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>) argmemonly nounwind
-declare <4 x i32*> @llvm.masked.load.v4p0i32.p0v4p0i32(<4 x i32*>*, i32, <4 x i1>, <4 x i32*>) argmemonly nounwind
+declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>) argmemonly nounwind
+declare <8 x i32> @llvm.masked.load.v8i32.p0(ptr, i32, <8 x i1>, <8 x i32>) argmemonly nounwind
+declare <4 x ptr> @llvm.masked.load.v4p0.p0(ptr, i32, <4 x i1>, <4 x ptr>) argmemonly nounwind
define <8 x i32> @load.v8i32.11100001(<8 x i32> %arg) {
; ALL-LABEL: @load.v8i32.11100001
- %p = load <8 x i32>*, <8 x i32>** @v8i32, align 8
+ %p = load ptr, ptr @v8i32, align 8
; NOLOAD-NOT: call void @__memprof_load
-; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 0
-; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP0]] to i64
+; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, ptr %p, i64 0, i64 0
+; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP0]] to i64
; LOAD: call void @__memprof_load(i64 [[PGEP0]])
-; LOAD: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 1
-; LOAD: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP1]] to i64
+; LOAD: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, ptr %p, i64 0, i64 1
+; LOAD: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP1]] to i64
; LOAD: call void @__memprof_load(i64 [[PGEP1]])
-; LOAD: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 2
-; LOAD: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP2]] to i64
+; LOAD: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, ptr %p, i64 0, i64 2
+; LOAD: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP2]] to i64
; LOAD: call void @__memprof_load(i64 [[PGEP2]])
-; LOAD: [[GEP7:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 7
-; LOAD: [[PGEP7:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP7]] to i64
+; LOAD: [[GEP7:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, ptr %p, i64 0, i64 7
+; LOAD: [[PGEP7:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP7]] to i64
; LOAD: call void @__memprof_load(i64 [[PGEP7]])
-; LOAD: tail call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> %arg)
- %res = tail call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> %arg)
+; LOAD: tail call <8 x i32> @llvm.masked.load.v8i32.p0(ptr %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> %arg)
+ %res = tail call <8 x i32> @llvm.masked.load.v8i32.p0(ptr %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> %arg)
ret <8 x i32> %res
}
define <4 x float> @load.v4f32.1001(<4 x float> %arg) {
; ALL-LABEL: @load.v4f32.1001
- %p = load <4 x float>*, <4 x float>** @v4f32, align 8
+ %p = load ptr, ptr @v4f32, align 8
; NOLOAD-NOT: call void @__memprof_load
-; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0
-; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64
+; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 0
+; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP0]] to i64
; LOAD: call void @__memprof_load(i64 [[PGEP0]])
-; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 3
-; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP3]] to i64
+; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 3
+; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP3]] to i64
; LOAD: call void @__memprof_load(i64 [[PGEP3]])
-; LOAD: tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %arg)
- %res = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %arg)
+; LOAD: tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %arg)
+ %res = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %arg)
ret <4 x float> %res
}
-define <4 x i32*> @load.v4i64.0001(<4 x i32*> %arg) {
+define <4 x ptr> @load.v4i64.0001(<4 x ptr> %arg) {
; ALL-LABEL: @load.v4i64.0001
- %p = load <4 x i32*>*, <4 x i32*>** @v4i64, align 8
+ %p = load ptr, ptr @v4i64, align 8
; NOLOAD-NOT: call void @__memprof_load
-; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x i32*>, <4 x i32*>* %p, i64 0, i64 3
-; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint i32** [[GEP3]] to i64
+; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x ptr>, ptr %p, i64 0, i64 3
+; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP3]] to i64
; LOAD: call void @__memprof_load(i64 [[PGEP3]])
-; LOAD: tail call <4 x i32*> @llvm.masked.load.v4p0i32.p0v4p0i32(<4 x i32*>* %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32*> %arg)
- %res = tail call <4 x i32*> @llvm.masked.load.v4p0i32.p0v4p0i32(<4 x i32*>* %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32*> %arg)
- ret <4 x i32*> %res
+; LOAD: tail call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x ptr> %arg)
+ %res = tail call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x ptr> %arg)
+ ret <4 x ptr> %res
}
define <4 x float> @load.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) {
; ALL-LABEL: @load.v4f32.variable
- %p = load <4 x float>*, <4 x float>** @v4f32, align 8
+ %p = load ptr, ptr @v4f32, align 8
; LOAD: [[MASK0:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 0
; LOAD: br i1 [[MASK0]], label %[[THEN0:[0-9A-Za-z]+]], label %[[AFTER0:[0-9A-Za-z]+]]
; LOAD: [[THEN0]]:
-; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0
-; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64
+; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 0
+; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP0]] to i64
; LOAD: call void @__memprof_load(i64 [[PGEP0]])
; LOAD: br label %[[AFTER0]]
; LOAD: [[AFTER0]]:
; LOAD: [[MASK1:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 1
; LOAD: br i1 [[MASK1]], label %[[THEN1:[0-9A-Za-z]+]], label %[[AFTER1:[0-9A-Za-z]+]]
; LOAD: [[THEN1]]:
-; LOAD: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 1
-; LOAD: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP1]] to i64
+; LOAD: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 1
+; LOAD: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP1]] to i64
; LOAD: call void @__memprof_load(i64 [[PGEP1]])
; LOAD: br label %[[AFTER1]]
; LOAD: [[AFTER1]]:
; LOAD: [[MASK2:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 2
; LOAD: br i1 [[MASK2]], label %[[THEN2:[0-9A-Za-z]+]], label %[[AFTER2:[0-9A-Za-z]+]]
; LOAD: [[THEN2]]:
-; LOAD: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 2
-; LOAD: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP2]] to i64
+; LOAD: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 2
+; LOAD: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP2]] to i64
; LOAD: call void @__memprof_load(i64 [[PGEP2]])
; LOAD: br label %[[AFTER2]]
; LOAD: [[AFTER2]]:
; LOAD: [[MASK3:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 3
; LOAD: br i1 [[MASK3]], label %[[THEN3:[0-9A-Za-z]+]], label %[[AFTER3:[0-9A-Za-z]+]]
; LOAD: [[THEN3]]:
-; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 3
-; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP3]] to i64
+; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 3
+; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP3]] to i64
; LOAD: call void @__memprof_load(i64 [[PGEP3]])
; LOAD: br label %[[AFTER3]]
; LOAD: [[AFTER3]]:
-; LOAD: tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> %mask, <4 x float> %arg)
- %res = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> %mask, <4 x float> %arg)
+; LOAD: tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> %mask, <4 x float> %arg)
+ %res = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> %mask, <4 x float> %arg)
ret <4 x float> %res
}
;; Load using two masked.loads, which should instrument them both.
define <4 x float> @load.v4f32.1001.split(<4 x float> %arg) {
; BOTH-LABEL: @load.v4f32.1001
- %p = load <4 x float>*, <4 x float>** @v4f32, align 8
-; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0
-; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64
+ %p = load ptr, ptr @v4f32, align 8
+; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 0
+; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP0]] to i64
; LOAD: call void @__memprof_load(i64 [[PGEP0]])
-; LOAD: %res = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %arg)
- %res = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %arg)
-; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 3
-; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP3]] to i64
+; LOAD: %res = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %arg)
+ %res = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %arg)
+; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, ptr %p, i64 0, i64 3
+; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint ptr [[GEP3]] to i64
; LOAD: call void @__memprof_load(i64 [[PGEP3]])
-; LOAD: tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %res)
- %res2 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %res)
+; LOAD: tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %res)
+ %res2 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %res)
ret <4 x float> %res2
}