; RUN: llc -march=nvptx < %s > %t
; RUN: llc -march=nvptx64 < %s > %t
-@i1_l = external global i1 ; <i1*> [#uses=1]
-@i1_s = external global i1 ; <i1*> [#uses=1]
-@i2_l = external global i2 ; <i2*> [#uses=1]
-@i2_s = external global i2 ; <i2*> [#uses=1]
-@i3_l = external global i3 ; <i3*> [#uses=1]
-@i3_s = external global i3 ; <i3*> [#uses=1]
-@i4_l = external global i4 ; <i4*> [#uses=1]
-@i4_s = external global i4 ; <i4*> [#uses=1]
-@i5_l = external global i5 ; <i5*> [#uses=1]
-@i5_s = external global i5 ; <i5*> [#uses=1]
-@i6_l = external global i6 ; <i6*> [#uses=1]
-@i6_s = external global i6 ; <i6*> [#uses=1]
-@i7_l = external global i7 ; <i7*> [#uses=1]
-@i7_s = external global i7 ; <i7*> [#uses=1]
-@i8_l = external global i8 ; <i8*> [#uses=1]
-@i8_s = external global i8 ; <i8*> [#uses=1]
-@i9_l = external global i9 ; <i9*> [#uses=1]
-@i9_s = external global i9 ; <i9*> [#uses=1]
-@i10_l = external global i10 ; <i10*> [#uses=1]
-@i10_s = external global i10 ; <i10*> [#uses=1]
-@i11_l = external global i11 ; <i11*> [#uses=1]
-@i11_s = external global i11 ; <i11*> [#uses=1]
-@i12_l = external global i12 ; <i12*> [#uses=1]
-@i12_s = external global i12 ; <i12*> [#uses=1]
-@i13_l = external global i13 ; <i13*> [#uses=1]
-@i13_s = external global i13 ; <i13*> [#uses=1]
-@i14_l = external global i14 ; <i14*> [#uses=1]
-@i14_s = external global i14 ; <i14*> [#uses=1]
-@i15_l = external global i15 ; <i15*> [#uses=1]
-@i15_s = external global i15 ; <i15*> [#uses=1]
-@i16_l = external global i16 ; <i16*> [#uses=1]
-@i16_s = external global i16 ; <i16*> [#uses=1]
-@i17_l = external global i17 ; <i17*> [#uses=1]
-@i17_s = external global i17 ; <i17*> [#uses=1]
-@i18_l = external global i18 ; <i18*> [#uses=1]
-@i18_s = external global i18 ; <i18*> [#uses=1]
-@i19_l = external global i19 ; <i19*> [#uses=1]
-@i19_s = external global i19 ; <i19*> [#uses=1]
-@i20_l = external global i20 ; <i20*> [#uses=1]
-@i20_s = external global i20 ; <i20*> [#uses=1]
-@i21_l = external global i21 ; <i21*> [#uses=1]
-@i21_s = external global i21 ; <i21*> [#uses=1]
-@i22_l = external global i22 ; <i22*> [#uses=1]
-@i22_s = external global i22 ; <i22*> [#uses=1]
-@i23_l = external global i23 ; <i23*> [#uses=1]
-@i23_s = external global i23 ; <i23*> [#uses=1]
-@i24_l = external global i24 ; <i24*> [#uses=1]
-@i24_s = external global i24 ; <i24*> [#uses=1]
-@i25_l = external global i25 ; <i25*> [#uses=1]
-@i25_s = external global i25 ; <i25*> [#uses=1]
-@i26_l = external global i26 ; <i26*> [#uses=1]
-@i26_s = external global i26 ; <i26*> [#uses=1]
-@i27_l = external global i27 ; <i27*> [#uses=1]
-@i27_s = external global i27 ; <i27*> [#uses=1]
-@i28_l = external global i28 ; <i28*> [#uses=1]
-@i28_s = external global i28 ; <i28*> [#uses=1]
-@i29_l = external global i29 ; <i29*> [#uses=1]
-@i29_s = external global i29 ; <i29*> [#uses=1]
-@i30_l = external global i30 ; <i30*> [#uses=1]
-@i30_s = external global i30 ; <i30*> [#uses=1]
-@i31_l = external global i31 ; <i31*> [#uses=1]
-@i31_s = external global i31 ; <i31*> [#uses=1]
-@i32_l = external global i32 ; <i32*> [#uses=1]
-@i32_s = external global i32 ; <i32*> [#uses=1]
-@i33_l = external global i33 ; <i33*> [#uses=1]
-@i33_s = external global i33 ; <i33*> [#uses=1]
-@i34_l = external global i34 ; <i34*> [#uses=1]
-@i34_s = external global i34 ; <i34*> [#uses=1]
-@i35_l = external global i35 ; <i35*> [#uses=1]
-@i35_s = external global i35 ; <i35*> [#uses=1]
-@i36_l = external global i36 ; <i36*> [#uses=1]
-@i36_s = external global i36 ; <i36*> [#uses=1]
-@i37_l = external global i37 ; <i37*> [#uses=1]
-@i37_s = external global i37 ; <i37*> [#uses=1]
-@i38_l = external global i38 ; <i38*> [#uses=1]
-@i38_s = external global i38 ; <i38*> [#uses=1]
-@i39_l = external global i39 ; <i39*> [#uses=1]
-@i39_s = external global i39 ; <i39*> [#uses=1]
-@i40_l = external global i40 ; <i40*> [#uses=1]
-@i40_s = external global i40 ; <i40*> [#uses=1]
-@i41_l = external global i41 ; <i41*> [#uses=1]
-@i41_s = external global i41 ; <i41*> [#uses=1]
-@i42_l = external global i42 ; <i42*> [#uses=1]
-@i42_s = external global i42 ; <i42*> [#uses=1]
-@i43_l = external global i43 ; <i43*> [#uses=1]
-@i43_s = external global i43 ; <i43*> [#uses=1]
-@i44_l = external global i44 ; <i44*> [#uses=1]
-@i44_s = external global i44 ; <i44*> [#uses=1]
-@i45_l = external global i45 ; <i45*> [#uses=1]
-@i45_s = external global i45 ; <i45*> [#uses=1]
-@i46_l = external global i46 ; <i46*> [#uses=1]
-@i46_s = external global i46 ; <i46*> [#uses=1]
-@i47_l = external global i47 ; <i47*> [#uses=1]
-@i47_s = external global i47 ; <i47*> [#uses=1]
-@i48_l = external global i48 ; <i48*> [#uses=1]
-@i48_s = external global i48 ; <i48*> [#uses=1]
-@i49_l = external global i49 ; <i49*> [#uses=1]
-@i49_s = external global i49 ; <i49*> [#uses=1]
-@i50_l = external global i50 ; <i50*> [#uses=1]
-@i50_s = external global i50 ; <i50*> [#uses=1]
-@i51_l = external global i51 ; <i51*> [#uses=1]
-@i51_s = external global i51 ; <i51*> [#uses=1]
-@i52_l = external global i52 ; <i52*> [#uses=1]
-@i52_s = external global i52 ; <i52*> [#uses=1]
-@i53_l = external global i53 ; <i53*> [#uses=1]
-@i53_s = external global i53 ; <i53*> [#uses=1]
-@i54_l = external global i54 ; <i54*> [#uses=1]
-@i54_s = external global i54 ; <i54*> [#uses=1]
-@i55_l = external global i55 ; <i55*> [#uses=1]
-@i55_s = external global i55 ; <i55*> [#uses=1]
-@i56_l = external global i56 ; <i56*> [#uses=1]
-@i56_s = external global i56 ; <i56*> [#uses=1]
-@i57_l = external global i57 ; <i57*> [#uses=1]
-@i57_s = external global i57 ; <i57*> [#uses=1]
-@i58_l = external global i58 ; <i58*> [#uses=1]
-@i58_s = external global i58 ; <i58*> [#uses=1]
-@i59_l = external global i59 ; <i59*> [#uses=1]
-@i59_s = external global i59 ; <i59*> [#uses=1]
-@i60_l = external global i60 ; <i60*> [#uses=1]
-@i60_s = external global i60 ; <i60*> [#uses=1]
-@i61_l = external global i61 ; <i61*> [#uses=1]
-@i61_s = external global i61 ; <i61*> [#uses=1]
-@i62_l = external global i62 ; <i62*> [#uses=1]
-@i62_s = external global i62 ; <i62*> [#uses=1]
-@i63_l = external global i63 ; <i63*> [#uses=1]
-@i63_s = external global i63 ; <i63*> [#uses=1]
-@i64_l = external global i64 ; <i64*> [#uses=1]
-@i64_s = external global i64 ; <i64*> [#uses=1]
+@i1_l = external global i1 ; <ptr> [#uses=1]
+@i1_s = external global i1 ; <ptr> [#uses=1]
+@i2_l = external global i2 ; <ptr> [#uses=1]
+@i2_s = external global i2 ; <ptr> [#uses=1]
+@i3_l = external global i3 ; <ptr> [#uses=1]
+@i3_s = external global i3 ; <ptr> [#uses=1]
+@i4_l = external global i4 ; <ptr> [#uses=1]
+@i4_s = external global i4 ; <ptr> [#uses=1]
+@i5_l = external global i5 ; <ptr> [#uses=1]
+@i5_s = external global i5 ; <ptr> [#uses=1]
+@i6_l = external global i6 ; <ptr> [#uses=1]
+@i6_s = external global i6 ; <ptr> [#uses=1]
+@i7_l = external global i7 ; <ptr> [#uses=1]
+@i7_s = external global i7 ; <ptr> [#uses=1]
+@i8_l = external global i8 ; <ptr> [#uses=1]
+@i8_s = external global i8 ; <ptr> [#uses=1]
+@i9_l = external global i9 ; <ptr> [#uses=1]
+@i9_s = external global i9 ; <ptr> [#uses=1]
+@i10_l = external global i10 ; <ptr> [#uses=1]
+@i10_s = external global i10 ; <ptr> [#uses=1]
+@i11_l = external global i11 ; <ptr> [#uses=1]
+@i11_s = external global i11 ; <ptr> [#uses=1]
+@i12_l = external global i12 ; <ptr> [#uses=1]
+@i12_s = external global i12 ; <ptr> [#uses=1]
+@i13_l = external global i13 ; <ptr> [#uses=1]
+@i13_s = external global i13 ; <ptr> [#uses=1]
+@i14_l = external global i14 ; <ptr> [#uses=1]
+@i14_s = external global i14 ; <ptr> [#uses=1]
+@i15_l = external global i15 ; <ptr> [#uses=1]
+@i15_s = external global i15 ; <ptr> [#uses=1]
+@i16_l = external global i16 ; <ptr> [#uses=1]
+@i16_s = external global i16 ; <ptr> [#uses=1]
+@i17_l = external global i17 ; <ptr> [#uses=1]
+@i17_s = external global i17 ; <ptr> [#uses=1]
+@i18_l = external global i18 ; <ptr> [#uses=1]
+@i18_s = external global i18 ; <ptr> [#uses=1]
+@i19_l = external global i19 ; <ptr> [#uses=1]
+@i19_s = external global i19 ; <ptr> [#uses=1]
+@i20_l = external global i20 ; <ptr> [#uses=1]
+@i20_s = external global i20 ; <ptr> [#uses=1]
+@i21_l = external global i21 ; <ptr> [#uses=1]
+@i21_s = external global i21 ; <ptr> [#uses=1]
+@i22_l = external global i22 ; <ptr> [#uses=1]
+@i22_s = external global i22 ; <ptr> [#uses=1]
+@i23_l = external global i23 ; <ptr> [#uses=1]
+@i23_s = external global i23 ; <ptr> [#uses=1]
+@i24_l = external global i24 ; <ptr> [#uses=1]
+@i24_s = external global i24 ; <ptr> [#uses=1]
+@i25_l = external global i25 ; <ptr> [#uses=1]
+@i25_s = external global i25 ; <ptr> [#uses=1]
+@i26_l = external global i26 ; <ptr> [#uses=1]
+@i26_s = external global i26 ; <ptr> [#uses=1]
+@i27_l = external global i27 ; <ptr> [#uses=1]
+@i27_s = external global i27 ; <ptr> [#uses=1]
+@i28_l = external global i28 ; <ptr> [#uses=1]
+@i28_s = external global i28 ; <ptr> [#uses=1]
+@i29_l = external global i29 ; <ptr> [#uses=1]
+@i29_s = external global i29 ; <ptr> [#uses=1]
+@i30_l = external global i30 ; <ptr> [#uses=1]
+@i30_s = external global i30 ; <ptr> [#uses=1]
+@i31_l = external global i31 ; <ptr> [#uses=1]
+@i31_s = external global i31 ; <ptr> [#uses=1]
+@i32_l = external global i32 ; <ptr> [#uses=1]
+@i32_s = external global i32 ; <ptr> [#uses=1]
+@i33_l = external global i33 ; <ptr> [#uses=1]
+@i33_s = external global i33 ; <ptr> [#uses=1]
+@i34_l = external global i34 ; <ptr> [#uses=1]
+@i34_s = external global i34 ; <ptr> [#uses=1]
+@i35_l = external global i35 ; <ptr> [#uses=1]
+@i35_s = external global i35 ; <ptr> [#uses=1]
+@i36_l = external global i36 ; <ptr> [#uses=1]
+@i36_s = external global i36 ; <ptr> [#uses=1]
+@i37_l = external global i37 ; <ptr> [#uses=1]
+@i37_s = external global i37 ; <ptr> [#uses=1]
+@i38_l = external global i38 ; <ptr> [#uses=1]
+@i38_s = external global i38 ; <ptr> [#uses=1]
+@i39_l = external global i39 ; <ptr> [#uses=1]
+@i39_s = external global i39 ; <ptr> [#uses=1]
+@i40_l = external global i40 ; <ptr> [#uses=1]
+@i40_s = external global i40 ; <ptr> [#uses=1]
+@i41_l = external global i41 ; <ptr> [#uses=1]
+@i41_s = external global i41 ; <ptr> [#uses=1]
+@i42_l = external global i42 ; <ptr> [#uses=1]
+@i42_s = external global i42 ; <ptr> [#uses=1]
+@i43_l = external global i43 ; <ptr> [#uses=1]
+@i43_s = external global i43 ; <ptr> [#uses=1]
+@i44_l = external global i44 ; <ptr> [#uses=1]
+@i44_s = external global i44 ; <ptr> [#uses=1]
+@i45_l = external global i45 ; <ptr> [#uses=1]
+@i45_s = external global i45 ; <ptr> [#uses=1]
+@i46_l = external global i46 ; <ptr> [#uses=1]
+@i46_s = external global i46 ; <ptr> [#uses=1]
+@i47_l = external global i47 ; <ptr> [#uses=1]
+@i47_s = external global i47 ; <ptr> [#uses=1]
+@i48_l = external global i48 ; <ptr> [#uses=1]
+@i48_s = external global i48 ; <ptr> [#uses=1]
+@i49_l = external global i49 ; <ptr> [#uses=1]
+@i49_s = external global i49 ; <ptr> [#uses=1]
+@i50_l = external global i50 ; <ptr> [#uses=1]
+@i50_s = external global i50 ; <ptr> [#uses=1]
+@i51_l = external global i51 ; <ptr> [#uses=1]
+@i51_s = external global i51 ; <ptr> [#uses=1]
+@i52_l = external global i52 ; <ptr> [#uses=1]
+@i52_s = external global i52 ; <ptr> [#uses=1]
+@i53_l = external global i53 ; <ptr> [#uses=1]
+@i53_s = external global i53 ; <ptr> [#uses=1]
+@i54_l = external global i54 ; <ptr> [#uses=1]
+@i54_s = external global i54 ; <ptr> [#uses=1]
+@i55_l = external global i55 ; <ptr> [#uses=1]
+@i55_s = external global i55 ; <ptr> [#uses=1]
+@i56_l = external global i56 ; <ptr> [#uses=1]
+@i56_s = external global i56 ; <ptr> [#uses=1]
+@i57_l = external global i57 ; <ptr> [#uses=1]
+@i57_s = external global i57 ; <ptr> [#uses=1]
+@i58_l = external global i58 ; <ptr> [#uses=1]
+@i58_s = external global i58 ; <ptr> [#uses=1]
+@i59_l = external global i59 ; <ptr> [#uses=1]
+@i59_s = external global i59 ; <ptr> [#uses=1]
+@i60_l = external global i60 ; <ptr> [#uses=1]
+@i60_s = external global i60 ; <ptr> [#uses=1]
+@i61_l = external global i61 ; <ptr> [#uses=1]
+@i61_s = external global i61 ; <ptr> [#uses=1]
+@i62_l = external global i62 ; <ptr> [#uses=1]
+@i62_s = external global i62 ; <ptr> [#uses=1]
+@i63_l = external global i63 ; <ptr> [#uses=1]
+@i63_s = external global i63 ; <ptr> [#uses=1]
+@i64_l = external global i64 ; <ptr> [#uses=1]
+@i64_s = external global i64 ; <ptr> [#uses=1]
define void @i1_ls() nounwind {
- %tmp = load i1, i1* @i1_l ; <i1> [#uses=1]
- store i1 %tmp, i1* @i1_s
+ %tmp = load i1, ptr @i1_l ; <i1> [#uses=1]
+ store i1 %tmp, ptr @i1_s
ret void
}
define void @i2_ls() nounwind {
- %tmp = load i2, i2* @i2_l ; <i2> [#uses=1]
- store i2 %tmp, i2* @i2_s
+ %tmp = load i2, ptr @i2_l ; <i2> [#uses=1]
+ store i2 %tmp, ptr @i2_s
ret void
}
define void @i3_ls() nounwind {
- %tmp = load i3, i3* @i3_l ; <i3> [#uses=1]
- store i3 %tmp, i3* @i3_s
+ %tmp = load i3, ptr @i3_l ; <i3> [#uses=1]
+ store i3 %tmp, ptr @i3_s
ret void
}
define void @i4_ls() nounwind {
- %tmp = load i4, i4* @i4_l ; <i4> [#uses=1]
- store i4 %tmp, i4* @i4_s
+ %tmp = load i4, ptr @i4_l ; <i4> [#uses=1]
+ store i4 %tmp, ptr @i4_s
ret void
}
define void @i5_ls() nounwind {
- %tmp = load i5, i5* @i5_l ; <i5> [#uses=1]
- store i5 %tmp, i5* @i5_s
+ %tmp = load i5, ptr @i5_l ; <i5> [#uses=1]
+ store i5 %tmp, ptr @i5_s
ret void
}
define void @i6_ls() nounwind {
- %tmp = load i6, i6* @i6_l ; <i6> [#uses=1]
- store i6 %tmp, i6* @i6_s
+ %tmp = load i6, ptr @i6_l ; <i6> [#uses=1]
+ store i6 %tmp, ptr @i6_s
ret void
}
define void @i7_ls() nounwind {
- %tmp = load i7, i7* @i7_l ; <i7> [#uses=1]
- store i7 %tmp, i7* @i7_s
+ %tmp = load i7, ptr @i7_l ; <i7> [#uses=1]
+ store i7 %tmp, ptr @i7_s
ret void
}
define void @i8_ls() nounwind {
- %tmp = load i8, i8* @i8_l ; <i8> [#uses=1]
- store i8 %tmp, i8* @i8_s
+ %tmp = load i8, ptr @i8_l ; <i8> [#uses=1]
+ store i8 %tmp, ptr @i8_s
ret void
}
define void @i9_ls() nounwind {
- %tmp = load i9, i9* @i9_l ; <i9> [#uses=1]
- store i9 %tmp, i9* @i9_s
+ %tmp = load i9, ptr @i9_l ; <i9> [#uses=1]
+ store i9 %tmp, ptr @i9_s
ret void
}
define void @i10_ls() nounwind {
- %tmp = load i10, i10* @i10_l ; <i10> [#uses=1]
- store i10 %tmp, i10* @i10_s
+ %tmp = load i10, ptr @i10_l ; <i10> [#uses=1]
+ store i10 %tmp, ptr @i10_s
ret void
}
define void @i11_ls() nounwind {
- %tmp = load i11, i11* @i11_l ; <i11> [#uses=1]
- store i11 %tmp, i11* @i11_s
+ %tmp = load i11, ptr @i11_l ; <i11> [#uses=1]
+ store i11 %tmp, ptr @i11_s
ret void
}
define void @i12_ls() nounwind {
- %tmp = load i12, i12* @i12_l ; <i12> [#uses=1]
- store i12 %tmp, i12* @i12_s
+ %tmp = load i12, ptr @i12_l ; <i12> [#uses=1]
+ store i12 %tmp, ptr @i12_s
ret void
}
define void @i13_ls() nounwind {
- %tmp = load i13, i13* @i13_l ; <i13> [#uses=1]
- store i13 %tmp, i13* @i13_s
+ %tmp = load i13, ptr @i13_l ; <i13> [#uses=1]
+ store i13 %tmp, ptr @i13_s
ret void
}
define void @i14_ls() nounwind {
- %tmp = load i14, i14* @i14_l ; <i14> [#uses=1]
- store i14 %tmp, i14* @i14_s
+ %tmp = load i14, ptr @i14_l ; <i14> [#uses=1]
+ store i14 %tmp, ptr @i14_s
ret void
}
define void @i15_ls() nounwind {
- %tmp = load i15, i15* @i15_l ; <i15> [#uses=1]
- store i15 %tmp, i15* @i15_s
+ %tmp = load i15, ptr @i15_l ; <i15> [#uses=1]
+ store i15 %tmp, ptr @i15_s
ret void
}
define void @i16_ls() nounwind {
- %tmp = load i16, i16* @i16_l ; <i16> [#uses=1]
- store i16 %tmp, i16* @i16_s
+ %tmp = load i16, ptr @i16_l ; <i16> [#uses=1]
+ store i16 %tmp, ptr @i16_s
ret void
}
define void @i17_ls() nounwind {
- %tmp = load i17, i17* @i17_l ; <i17> [#uses=1]
- store i17 %tmp, i17* @i17_s
+ %tmp = load i17, ptr @i17_l ; <i17> [#uses=1]
+ store i17 %tmp, ptr @i17_s
ret void
}
define void @i18_ls() nounwind {
- %tmp = load i18, i18* @i18_l ; <i18> [#uses=1]
- store i18 %tmp, i18* @i18_s
+ %tmp = load i18, ptr @i18_l ; <i18> [#uses=1]
+ store i18 %tmp, ptr @i18_s
ret void
}
define void @i19_ls() nounwind {
- %tmp = load i19, i19* @i19_l ; <i19> [#uses=1]
- store i19 %tmp, i19* @i19_s
+ %tmp = load i19, ptr @i19_l ; <i19> [#uses=1]
+ store i19 %tmp, ptr @i19_s
ret void
}
define void @i20_ls() nounwind {
- %tmp = load i20, i20* @i20_l ; <i20> [#uses=1]
- store i20 %tmp, i20* @i20_s
+ %tmp = load i20, ptr @i20_l ; <i20> [#uses=1]
+ store i20 %tmp, ptr @i20_s
ret void
}
define void @i21_ls() nounwind {
- %tmp = load i21, i21* @i21_l ; <i21> [#uses=1]
- store i21 %tmp, i21* @i21_s
+ %tmp = load i21, ptr @i21_l ; <i21> [#uses=1]
+ store i21 %tmp, ptr @i21_s
ret void
}
define void @i22_ls() nounwind {
- %tmp = load i22, i22* @i22_l ; <i22> [#uses=1]
- store i22 %tmp, i22* @i22_s
+ %tmp = load i22, ptr @i22_l ; <i22> [#uses=1]
+ store i22 %tmp, ptr @i22_s
ret void
}
define void @i23_ls() nounwind {
- %tmp = load i23, i23* @i23_l ; <i23> [#uses=1]
- store i23 %tmp, i23* @i23_s
+ %tmp = load i23, ptr @i23_l ; <i23> [#uses=1]
+ store i23 %tmp, ptr @i23_s
ret void
}
define void @i24_ls() nounwind {
- %tmp = load i24, i24* @i24_l ; <i24> [#uses=1]
- store i24 %tmp, i24* @i24_s
+ %tmp = load i24, ptr @i24_l ; <i24> [#uses=1]
+ store i24 %tmp, ptr @i24_s
ret void
}
define void @i25_ls() nounwind {
- %tmp = load i25, i25* @i25_l ; <i25> [#uses=1]
- store i25 %tmp, i25* @i25_s
+ %tmp = load i25, ptr @i25_l ; <i25> [#uses=1]
+ store i25 %tmp, ptr @i25_s
ret void
}
define void @i26_ls() nounwind {
- %tmp = load i26, i26* @i26_l ; <i26> [#uses=1]
- store i26 %tmp, i26* @i26_s
+ %tmp = load i26, ptr @i26_l ; <i26> [#uses=1]
+ store i26 %tmp, ptr @i26_s
ret void
}
define void @i27_ls() nounwind {
- %tmp = load i27, i27* @i27_l ; <i27> [#uses=1]
- store i27 %tmp, i27* @i27_s
+ %tmp = load i27, ptr @i27_l ; <i27> [#uses=1]
+ store i27 %tmp, ptr @i27_s
ret void
}
define void @i28_ls() nounwind {
- %tmp = load i28, i28* @i28_l ; <i28> [#uses=1]
- store i28 %tmp, i28* @i28_s
+ %tmp = load i28, ptr @i28_l ; <i28> [#uses=1]
+ store i28 %tmp, ptr @i28_s
ret void
}
define void @i29_ls() nounwind {
- %tmp = load i29, i29* @i29_l ; <i29> [#uses=1]
- store i29 %tmp, i29* @i29_s
+ %tmp = load i29, ptr @i29_l ; <i29> [#uses=1]
+ store i29 %tmp, ptr @i29_s
ret void
}
define void @i30_ls() nounwind {
- %tmp = load i30, i30* @i30_l ; <i30> [#uses=1]
- store i30 %tmp, i30* @i30_s
+ %tmp = load i30, ptr @i30_l ; <i30> [#uses=1]
+ store i30 %tmp, ptr @i30_s
ret void
}
define void @i31_ls() nounwind {
- %tmp = load i31, i31* @i31_l ; <i31> [#uses=1]
- store i31 %tmp, i31* @i31_s
+ %tmp = load i31, ptr @i31_l ; <i31> [#uses=1]
+ store i31 %tmp, ptr @i31_s
ret void
}
define void @i32_ls() nounwind {
- %tmp = load i32, i32* @i32_l ; <i32> [#uses=1]
- store i32 %tmp, i32* @i32_s
+ %tmp = load i32, ptr @i32_l ; <i32> [#uses=1]
+ store i32 %tmp, ptr @i32_s
ret void
}
define void @i33_ls() nounwind {
- %tmp = load i33, i33* @i33_l ; <i33> [#uses=1]
- store i33 %tmp, i33* @i33_s
+ %tmp = load i33, ptr @i33_l ; <i33> [#uses=1]
+ store i33 %tmp, ptr @i33_s
ret void
}
define void @i34_ls() nounwind {
- %tmp = load i34, i34* @i34_l ; <i34> [#uses=1]
- store i34 %tmp, i34* @i34_s
+ %tmp = load i34, ptr @i34_l ; <i34> [#uses=1]
+ store i34 %tmp, ptr @i34_s
ret void
}
define void @i35_ls() nounwind {
- %tmp = load i35, i35* @i35_l ; <i35> [#uses=1]
- store i35 %tmp, i35* @i35_s
+ %tmp = load i35, ptr @i35_l ; <i35> [#uses=1]
+ store i35 %tmp, ptr @i35_s
ret void
}
define void @i36_ls() nounwind {
- %tmp = load i36, i36* @i36_l ; <i36> [#uses=1]
- store i36 %tmp, i36* @i36_s
+ %tmp = load i36, ptr @i36_l ; <i36> [#uses=1]
+ store i36 %tmp, ptr @i36_s
ret void
}
define void @i37_ls() nounwind {
- %tmp = load i37, i37* @i37_l ; <i37> [#uses=1]
- store i37 %tmp, i37* @i37_s
+ %tmp = load i37, ptr @i37_l ; <i37> [#uses=1]
+ store i37 %tmp, ptr @i37_s
ret void
}
define void @i38_ls() nounwind {
- %tmp = load i38, i38* @i38_l ; <i38> [#uses=1]
- store i38 %tmp, i38* @i38_s
+ %tmp = load i38, ptr @i38_l ; <i38> [#uses=1]
+ store i38 %tmp, ptr @i38_s
ret void
}
define void @i39_ls() nounwind {
- %tmp = load i39, i39* @i39_l ; <i39> [#uses=1]
- store i39 %tmp, i39* @i39_s
+ %tmp = load i39, ptr @i39_l ; <i39> [#uses=1]
+ store i39 %tmp, ptr @i39_s
ret void
}
define void @i40_ls() nounwind {
- %tmp = load i40, i40* @i40_l ; <i40> [#uses=1]
- store i40 %tmp, i40* @i40_s
+ %tmp = load i40, ptr @i40_l ; <i40> [#uses=1]
+ store i40 %tmp, ptr @i40_s
ret void
}
define void @i41_ls() nounwind {
- %tmp = load i41, i41* @i41_l ; <i41> [#uses=1]
- store i41 %tmp, i41* @i41_s
+ %tmp = load i41, ptr @i41_l ; <i41> [#uses=1]
+ store i41 %tmp, ptr @i41_s
ret void
}
define void @i42_ls() nounwind {
- %tmp = load i42, i42* @i42_l ; <i42> [#uses=1]
- store i42 %tmp, i42* @i42_s
+ %tmp = load i42, ptr @i42_l ; <i42> [#uses=1]
+ store i42 %tmp, ptr @i42_s
ret void
}
define void @i43_ls() nounwind {
- %tmp = load i43, i43* @i43_l ; <i43> [#uses=1]
- store i43 %tmp, i43* @i43_s
+ %tmp = load i43, ptr @i43_l ; <i43> [#uses=1]
+ store i43 %tmp, ptr @i43_s
ret void
}
define void @i44_ls() nounwind {
- %tmp = load i44, i44* @i44_l ; <i44> [#uses=1]
- store i44 %tmp, i44* @i44_s
+ %tmp = load i44, ptr @i44_l ; <i44> [#uses=1]
+ store i44 %tmp, ptr @i44_s
ret void
}
define void @i45_ls() nounwind {
- %tmp = load i45, i45* @i45_l ; <i45> [#uses=1]
- store i45 %tmp, i45* @i45_s
+ %tmp = load i45, ptr @i45_l ; <i45> [#uses=1]
+ store i45 %tmp, ptr @i45_s
ret void
}
define void @i46_ls() nounwind {
- %tmp = load i46, i46* @i46_l ; <i46> [#uses=1]
- store i46 %tmp, i46* @i46_s
+ %tmp = load i46, ptr @i46_l ; <i46> [#uses=1]
+ store i46 %tmp, ptr @i46_s
ret void
}
define void @i47_ls() nounwind {
- %tmp = load i47, i47* @i47_l ; <i47> [#uses=1]
- store i47 %tmp, i47* @i47_s
+ %tmp = load i47, ptr @i47_l ; <i47> [#uses=1]
+ store i47 %tmp, ptr @i47_s
ret void
}
define void @i48_ls() nounwind {
- %tmp = load i48, i48* @i48_l ; <i48> [#uses=1]
- store i48 %tmp, i48* @i48_s
+ %tmp = load i48, ptr @i48_l ; <i48> [#uses=1]
+ store i48 %tmp, ptr @i48_s
ret void
}
define void @i49_ls() nounwind {
- %tmp = load i49, i49* @i49_l ; <i49> [#uses=1]
- store i49 %tmp, i49* @i49_s
+ %tmp = load i49, ptr @i49_l ; <i49> [#uses=1]
+ store i49 %tmp, ptr @i49_s
ret void
}
define void @i50_ls() nounwind {
- %tmp = load i50, i50* @i50_l ; <i50> [#uses=1]
- store i50 %tmp, i50* @i50_s
+ %tmp = load i50, ptr @i50_l ; <i50> [#uses=1]
+ store i50 %tmp, ptr @i50_s
ret void
}
define void @i51_ls() nounwind {
- %tmp = load i51, i51* @i51_l ; <i51> [#uses=1]
- store i51 %tmp, i51* @i51_s
+ %tmp = load i51, ptr @i51_l ; <i51> [#uses=1]
+ store i51 %tmp, ptr @i51_s
ret void
}
define void @i52_ls() nounwind {
- %tmp = load i52, i52* @i52_l ; <i52> [#uses=1]
- store i52 %tmp, i52* @i52_s
+ %tmp = load i52, ptr @i52_l ; <i52> [#uses=1]
+ store i52 %tmp, ptr @i52_s
ret void
}
define void @i53_ls() nounwind {
- %tmp = load i53, i53* @i53_l ; <i53> [#uses=1]
- store i53 %tmp, i53* @i53_s
+ %tmp = load i53, ptr @i53_l ; <i53> [#uses=1]
+ store i53 %tmp, ptr @i53_s
ret void
}
define void @i54_ls() nounwind {
- %tmp = load i54, i54* @i54_l ; <i54> [#uses=1]
- store i54 %tmp, i54* @i54_s
+ %tmp = load i54, ptr @i54_l ; <i54> [#uses=1]
+ store i54 %tmp, ptr @i54_s
ret void
}
define void @i55_ls() nounwind {
- %tmp = load i55, i55* @i55_l ; <i55> [#uses=1]
- store i55 %tmp, i55* @i55_s
+ %tmp = load i55, ptr @i55_l ; <i55> [#uses=1]
+ store i55 %tmp, ptr @i55_s
ret void
}
define void @i56_ls() nounwind {
- %tmp = load i56, i56* @i56_l ; <i56> [#uses=1]
- store i56 %tmp, i56* @i56_s
+ %tmp = load i56, ptr @i56_l ; <i56> [#uses=1]
+ store i56 %tmp, ptr @i56_s
ret void
}
define void @i57_ls() nounwind {
- %tmp = load i57, i57* @i57_l ; <i57> [#uses=1]
- store i57 %tmp, i57* @i57_s
+ %tmp = load i57, ptr @i57_l ; <i57> [#uses=1]
+ store i57 %tmp, ptr @i57_s
ret void
}
define void @i58_ls() nounwind {
- %tmp = load i58, i58* @i58_l ; <i58> [#uses=1]
- store i58 %tmp, i58* @i58_s
+ %tmp = load i58, ptr @i58_l ; <i58> [#uses=1]
+ store i58 %tmp, ptr @i58_s
ret void
}
define void @i59_ls() nounwind {
- %tmp = load i59, i59* @i59_l ; <i59> [#uses=1]
- store i59 %tmp, i59* @i59_s
+ %tmp = load i59, ptr @i59_l ; <i59> [#uses=1]
+ store i59 %tmp, ptr @i59_s
ret void
}
define void @i60_ls() nounwind {
- %tmp = load i60, i60* @i60_l ; <i60> [#uses=1]
- store i60 %tmp, i60* @i60_s
+ %tmp = load i60, ptr @i60_l ; <i60> [#uses=1]
+ store i60 %tmp, ptr @i60_s
ret void
}
define void @i61_ls() nounwind {
- %tmp = load i61, i61* @i61_l ; <i61> [#uses=1]
- store i61 %tmp, i61* @i61_s
+ %tmp = load i61, ptr @i61_l ; <i61> [#uses=1]
+ store i61 %tmp, ptr @i61_s
ret void
}
define void @i62_ls() nounwind {
- %tmp = load i62, i62* @i62_l ; <i62> [#uses=1]
- store i62 %tmp, i62* @i62_s
+ %tmp = load i62, ptr @i62_l ; <i62> [#uses=1]
+ store i62 %tmp, ptr @i62_s
ret void
}
define void @i63_ls() nounwind {
- %tmp = load i63, i63* @i63_l ; <i63> [#uses=1]
- store i63 %tmp, i63* @i63_s
+ %tmp = load i63, ptr @i63_l ; <i63> [#uses=1]
+ store i63 %tmp, ptr @i63_s
ret void
}
define void @i64_ls() nounwind {
- %tmp = load i64, i64* @i64_l ; <i64> [#uses=1]
- store i64 %tmp, i64* @i64_s
+ %tmp = load i64, ptr @i64_l ; <i64> [#uses=1]
+ store i64 %tmp, ptr @i64_s
ret void
}
; RUN: llc -march=nvptx < %s > %t
; RUN: llc -march=nvptx64 < %s > %t
-@i1_s = external global i1 ; <i1*> [#uses=1]
-@i2_s = external global i2 ; <i2*> [#uses=1]
-@i3_s = external global i3 ; <i3*> [#uses=1]
-@i4_s = external global i4 ; <i4*> [#uses=1]
-@i5_s = external global i5 ; <i5*> [#uses=1]
-@i6_s = external global i6 ; <i6*> [#uses=1]
-@i7_s = external global i7 ; <i7*> [#uses=1]
-@i8_s = external global i8 ; <i8*> [#uses=1]
-@i9_s = external global i9 ; <i9*> [#uses=1]
-@i10_s = external global i10 ; <i10*> [#uses=1]
-@i11_s = external global i11 ; <i11*> [#uses=1]
-@i12_s = external global i12 ; <i12*> [#uses=1]
-@i13_s = external global i13 ; <i13*> [#uses=1]
-@i14_s = external global i14 ; <i14*> [#uses=1]
-@i15_s = external global i15 ; <i15*> [#uses=1]
-@i16_s = external global i16 ; <i16*> [#uses=1]
-@i17_s = external global i17 ; <i17*> [#uses=1]
-@i18_s = external global i18 ; <i18*> [#uses=1]
-@i19_s = external global i19 ; <i19*> [#uses=1]
-@i20_s = external global i20 ; <i20*> [#uses=1]
-@i21_s = external global i21 ; <i21*> [#uses=1]
-@i22_s = external global i22 ; <i22*> [#uses=1]
-@i23_s = external global i23 ; <i23*> [#uses=1]
-@i24_s = external global i24 ; <i24*> [#uses=1]
-@i25_s = external global i25 ; <i25*> [#uses=1]
-@i26_s = external global i26 ; <i26*> [#uses=1]
-@i27_s = external global i27 ; <i27*> [#uses=1]
-@i28_s = external global i28 ; <i28*> [#uses=1]
-@i29_s = external global i29 ; <i29*> [#uses=1]
-@i30_s = external global i30 ; <i30*> [#uses=1]
-@i31_s = external global i31 ; <i31*> [#uses=1]
-@i32_s = external global i32 ; <i32*> [#uses=1]
-@i33_s = external global i33 ; <i33*> [#uses=1]
-@i34_s = external global i34 ; <i34*> [#uses=1]
-@i35_s = external global i35 ; <i35*> [#uses=1]
-@i36_s = external global i36 ; <i36*> [#uses=1]
-@i37_s = external global i37 ; <i37*> [#uses=1]
-@i38_s = external global i38 ; <i38*> [#uses=1]
-@i39_s = external global i39 ; <i39*> [#uses=1]
-@i40_s = external global i40 ; <i40*> [#uses=1]
-@i41_s = external global i41 ; <i41*> [#uses=1]
-@i42_s = external global i42 ; <i42*> [#uses=1]
-@i43_s = external global i43 ; <i43*> [#uses=1]
-@i44_s = external global i44 ; <i44*> [#uses=1]
-@i45_s = external global i45 ; <i45*> [#uses=1]
-@i46_s = external global i46 ; <i46*> [#uses=1]
-@i47_s = external global i47 ; <i47*> [#uses=1]
-@i48_s = external global i48 ; <i48*> [#uses=1]
-@i49_s = external global i49 ; <i49*> [#uses=1]
-@i50_s = external global i50 ; <i50*> [#uses=1]
-@i51_s = external global i51 ; <i51*> [#uses=1]
-@i52_s = external global i52 ; <i52*> [#uses=1]
-@i53_s = external global i53 ; <i53*> [#uses=1]
-@i54_s = external global i54 ; <i54*> [#uses=1]
-@i55_s = external global i55 ; <i55*> [#uses=1]
-@i56_s = external global i56 ; <i56*> [#uses=1]
-@i57_s = external global i57 ; <i57*> [#uses=1]
-@i58_s = external global i58 ; <i58*> [#uses=1]
-@i59_s = external global i59 ; <i59*> [#uses=1]
-@i60_s = external global i60 ; <i60*> [#uses=1]
-@i61_s = external global i61 ; <i61*> [#uses=1]
-@i62_s = external global i62 ; <i62*> [#uses=1]
-@i63_s = external global i63 ; <i63*> [#uses=1]
-@i64_s = external global i64 ; <i64*> [#uses=1]
+@i1_s = external global i1 ; <ptr> [#uses=1]
+@i2_s = external global i2 ; <ptr> [#uses=1]
+@i3_s = external global i3 ; <ptr> [#uses=1]
+@i4_s = external global i4 ; <ptr> [#uses=1]
+@i5_s = external global i5 ; <ptr> [#uses=1]
+@i6_s = external global i6 ; <ptr> [#uses=1]
+@i7_s = external global i7 ; <ptr> [#uses=1]
+@i8_s = external global i8 ; <ptr> [#uses=1]
+@i9_s = external global i9 ; <ptr> [#uses=1]
+@i10_s = external global i10 ; <ptr> [#uses=1]
+@i11_s = external global i11 ; <ptr> [#uses=1]
+@i12_s = external global i12 ; <ptr> [#uses=1]
+@i13_s = external global i13 ; <ptr> [#uses=1]
+@i14_s = external global i14 ; <ptr> [#uses=1]
+@i15_s = external global i15 ; <ptr> [#uses=1]
+@i16_s = external global i16 ; <ptr> [#uses=1]
+@i17_s = external global i17 ; <ptr> [#uses=1]
+@i18_s = external global i18 ; <ptr> [#uses=1]
+@i19_s = external global i19 ; <ptr> [#uses=1]
+@i20_s = external global i20 ; <ptr> [#uses=1]
+@i21_s = external global i21 ; <ptr> [#uses=1]
+@i22_s = external global i22 ; <ptr> [#uses=1]
+@i23_s = external global i23 ; <ptr> [#uses=1]
+@i24_s = external global i24 ; <ptr> [#uses=1]
+@i25_s = external global i25 ; <ptr> [#uses=1]
+@i26_s = external global i26 ; <ptr> [#uses=1]
+@i27_s = external global i27 ; <ptr> [#uses=1]
+@i28_s = external global i28 ; <ptr> [#uses=1]
+@i29_s = external global i29 ; <ptr> [#uses=1]
+@i30_s = external global i30 ; <ptr> [#uses=1]
+@i31_s = external global i31 ; <ptr> [#uses=1]
+@i32_s = external global i32 ; <ptr> [#uses=1]
+@i33_s = external global i33 ; <ptr> [#uses=1]
+@i34_s = external global i34 ; <ptr> [#uses=1]
+@i35_s = external global i35 ; <ptr> [#uses=1]
+@i36_s = external global i36 ; <ptr> [#uses=1]
+@i37_s = external global i37 ; <ptr> [#uses=1]
+@i38_s = external global i38 ; <ptr> [#uses=1]
+@i39_s = external global i39 ; <ptr> [#uses=1]
+@i40_s = external global i40 ; <ptr> [#uses=1]
+@i41_s = external global i41 ; <ptr> [#uses=1]
+@i42_s = external global i42 ; <ptr> [#uses=1]
+@i43_s = external global i43 ; <ptr> [#uses=1]
+@i44_s = external global i44 ; <ptr> [#uses=1]
+@i45_s = external global i45 ; <ptr> [#uses=1]
+@i46_s = external global i46 ; <ptr> [#uses=1]
+@i47_s = external global i47 ; <ptr> [#uses=1]
+@i48_s = external global i48 ; <ptr> [#uses=1]
+@i49_s = external global i49 ; <ptr> [#uses=1]
+@i50_s = external global i50 ; <ptr> [#uses=1]
+@i51_s = external global i51 ; <ptr> [#uses=1]
+@i52_s = external global i52 ; <ptr> [#uses=1]
+@i53_s = external global i53 ; <ptr> [#uses=1]
+@i54_s = external global i54 ; <ptr> [#uses=1]
+@i55_s = external global i55 ; <ptr> [#uses=1]
+@i56_s = external global i56 ; <ptr> [#uses=1]
+@i57_s = external global i57 ; <ptr> [#uses=1]
+@i58_s = external global i58 ; <ptr> [#uses=1]
+@i59_s = external global i59 ; <ptr> [#uses=1]
+@i60_s = external global i60 ; <ptr> [#uses=1]
+@i61_s = external global i61 ; <ptr> [#uses=1]
+@i62_s = external global i62 ; <ptr> [#uses=1]
+@i63_s = external global i63 ; <ptr> [#uses=1]
+@i64_s = external global i64 ; <ptr> [#uses=1]
define void @i1_ls(i1 %x) nounwind {
- store i1 %x, i1* @i1_s
+ store i1 %x, ptr @i1_s
ret void
}
define void @i2_ls(i2 %x) nounwind {
- store i2 %x, i2* @i2_s
+ store i2 %x, ptr @i2_s
ret void
}
define void @i3_ls(i3 %x) nounwind {
- store i3 %x, i3* @i3_s
+ store i3 %x, ptr @i3_s
ret void
}
define void @i4_ls(i4 %x) nounwind {
- store i4 %x, i4* @i4_s
+ store i4 %x, ptr @i4_s
ret void
}
define void @i5_ls(i5 %x) nounwind {
- store i5 %x, i5* @i5_s
+ store i5 %x, ptr @i5_s
ret void
}
define void @i6_ls(i6 %x) nounwind {
- store i6 %x, i6* @i6_s
+ store i6 %x, ptr @i6_s
ret void
}
define void @i7_ls(i7 %x) nounwind {
- store i7 %x, i7* @i7_s
+ store i7 %x, ptr @i7_s
ret void
}
define void @i8_ls(i8 %x) nounwind {
- store i8 %x, i8* @i8_s
+ store i8 %x, ptr @i8_s
ret void
}
define void @i9_ls(i9 %x) nounwind {
- store i9 %x, i9* @i9_s
+ store i9 %x, ptr @i9_s
ret void
}
define void @i10_ls(i10 %x) nounwind {
- store i10 %x, i10* @i10_s
+ store i10 %x, ptr @i10_s
ret void
}
define void @i11_ls(i11 %x) nounwind {
- store i11 %x, i11* @i11_s
+ store i11 %x, ptr @i11_s
ret void
}
define void @i12_ls(i12 %x) nounwind {
- store i12 %x, i12* @i12_s
+ store i12 %x, ptr @i12_s
ret void
}
define void @i13_ls(i13 %x) nounwind {
- store i13 %x, i13* @i13_s
+ store i13 %x, ptr @i13_s
ret void
}
define void @i14_ls(i14 %x) nounwind {
- store i14 %x, i14* @i14_s
+ store i14 %x, ptr @i14_s
ret void
}
define void @i15_ls(i15 %x) nounwind {
- store i15 %x, i15* @i15_s
+ store i15 %x, ptr @i15_s
ret void
}
define void @i16_ls(i16 %x) nounwind {
- store i16 %x, i16* @i16_s
+ store i16 %x, ptr @i16_s
ret void
}
define void @i17_ls(i17 %x) nounwind {
- store i17 %x, i17* @i17_s
+ store i17 %x, ptr @i17_s
ret void
}
define void @i18_ls(i18 %x) nounwind {
- store i18 %x, i18* @i18_s
+ store i18 %x, ptr @i18_s
ret void
}
define void @i19_ls(i19 %x) nounwind {
- store i19 %x, i19* @i19_s
+ store i19 %x, ptr @i19_s
ret void
}
define void @i20_ls(i20 %x) nounwind {
- store i20 %x, i20* @i20_s
+ store i20 %x, ptr @i20_s
ret void
}
define void @i21_ls(i21 %x) nounwind {
- store i21 %x, i21* @i21_s
+ store i21 %x, ptr @i21_s
ret void
}
define void @i22_ls(i22 %x) nounwind {
- store i22 %x, i22* @i22_s
+ store i22 %x, ptr @i22_s
ret void
}
define void @i23_ls(i23 %x) nounwind {
- store i23 %x, i23* @i23_s
+ store i23 %x, ptr @i23_s
ret void
}
define void @i24_ls(i24 %x) nounwind {
- store i24 %x, i24* @i24_s
+ store i24 %x, ptr @i24_s
ret void
}
define void @i25_ls(i25 %x) nounwind {
- store i25 %x, i25* @i25_s
+ store i25 %x, ptr @i25_s
ret void
}
define void @i26_ls(i26 %x) nounwind {
- store i26 %x, i26* @i26_s
+ store i26 %x, ptr @i26_s
ret void
}
define void @i27_ls(i27 %x) nounwind {
- store i27 %x, i27* @i27_s
+ store i27 %x, ptr @i27_s
ret void
}
define void @i28_ls(i28 %x) nounwind {
- store i28 %x, i28* @i28_s
+ store i28 %x, ptr @i28_s
ret void
}
define void @i29_ls(i29 %x) nounwind {
- store i29 %x, i29* @i29_s
+ store i29 %x, ptr @i29_s
ret void
}
define void @i30_ls(i30 %x) nounwind {
- store i30 %x, i30* @i30_s
+ store i30 %x, ptr @i30_s
ret void
}
define void @i31_ls(i31 %x) nounwind {
- store i31 %x, i31* @i31_s
+ store i31 %x, ptr @i31_s
ret void
}
define void @i32_ls(i32 %x) nounwind {
- store i32 %x, i32* @i32_s
+ store i32 %x, ptr @i32_s
ret void
}
define void @i33_ls(i33 %x) nounwind {
- store i33 %x, i33* @i33_s
+ store i33 %x, ptr @i33_s
ret void
}
define void @i34_ls(i34 %x) nounwind {
- store i34 %x, i34* @i34_s
+ store i34 %x, ptr @i34_s
ret void
}
define void @i35_ls(i35 %x) nounwind {
- store i35 %x, i35* @i35_s
+ store i35 %x, ptr @i35_s
ret void
}
define void @i36_ls(i36 %x) nounwind {
- store i36 %x, i36* @i36_s
+ store i36 %x, ptr @i36_s
ret void
}
define void @i37_ls(i37 %x) nounwind {
- store i37 %x, i37* @i37_s
+ store i37 %x, ptr @i37_s
ret void
}
define void @i38_ls(i38 %x) nounwind {
- store i38 %x, i38* @i38_s
+ store i38 %x, ptr @i38_s
ret void
}
define void @i39_ls(i39 %x) nounwind {
- store i39 %x, i39* @i39_s
+ store i39 %x, ptr @i39_s
ret void
}
define void @i40_ls(i40 %x) nounwind {
- store i40 %x, i40* @i40_s
+ store i40 %x, ptr @i40_s
ret void
}
define void @i41_ls(i41 %x) nounwind {
- store i41 %x, i41* @i41_s
+ store i41 %x, ptr @i41_s
ret void
}
define void @i42_ls(i42 %x) nounwind {
- store i42 %x, i42* @i42_s
+ store i42 %x, ptr @i42_s
ret void
}
define void @i43_ls(i43 %x) nounwind {
- store i43 %x, i43* @i43_s
+ store i43 %x, ptr @i43_s
ret void
}
define void @i44_ls(i44 %x) nounwind {
- store i44 %x, i44* @i44_s
+ store i44 %x, ptr @i44_s
ret void
}
define void @i45_ls(i45 %x) nounwind {
- store i45 %x, i45* @i45_s
+ store i45 %x, ptr @i45_s
ret void
}
define void @i46_ls(i46 %x) nounwind {
- store i46 %x, i46* @i46_s
+ store i46 %x, ptr @i46_s
ret void
}
define void @i47_ls(i47 %x) nounwind {
- store i47 %x, i47* @i47_s
+ store i47 %x, ptr @i47_s
ret void
}
define void @i48_ls(i48 %x) nounwind {
- store i48 %x, i48* @i48_s
+ store i48 %x, ptr @i48_s
ret void
}
define void @i49_ls(i49 %x) nounwind {
- store i49 %x, i49* @i49_s
+ store i49 %x, ptr @i49_s
ret void
}
define void @i50_ls(i50 %x) nounwind {
- store i50 %x, i50* @i50_s
+ store i50 %x, ptr @i50_s
ret void
}
define void @i51_ls(i51 %x) nounwind {
- store i51 %x, i51* @i51_s
+ store i51 %x, ptr @i51_s
ret void
}
define void @i52_ls(i52 %x) nounwind {
- store i52 %x, i52* @i52_s
+ store i52 %x, ptr @i52_s
ret void
}
define void @i53_ls(i53 %x) nounwind {
- store i53 %x, i53* @i53_s
+ store i53 %x, ptr @i53_s
ret void
}
define void @i54_ls(i54 %x) nounwind {
- store i54 %x, i54* @i54_s
+ store i54 %x, ptr @i54_s
ret void
}
define void @i55_ls(i55 %x) nounwind {
- store i55 %x, i55* @i55_s
+ store i55 %x, ptr @i55_s
ret void
}
define void @i56_ls(i56 %x) nounwind {
- store i56 %x, i56* @i56_s
+ store i56 %x, ptr @i56_s
ret void
}
define void @i57_ls(i57 %x) nounwind {
- store i57 %x, i57* @i57_s
+ store i57 %x, ptr @i57_s
ret void
}
define void @i58_ls(i58 %x) nounwind {
- store i58 %x, i58* @i58_s
+ store i58 %x, ptr @i58_s
ret void
}
define void @i59_ls(i59 %x) nounwind {
- store i59 %x, i59* @i59_s
+ store i59 %x, ptr @i59_s
ret void
}
define void @i60_ls(i60 %x) nounwind {
- store i60 %x, i60* @i60_s
+ store i60 %x, ptr @i60_s
ret void
}
define void @i61_ls(i61 %x) nounwind {
- store i61 %x, i61* @i61_s
+ store i61 %x, ptr @i61_s
ret void
}
define void @i62_ls(i62 %x) nounwind {
- store i62 %x, i62* @i62_s
+ store i62 %x, ptr @i62_s
ret void
}
define void @i63_ls(i63 %x) nounwind {
- store i63 %x, i63* @i63_s
+ store i63 %x, ptr @i63_s
ret void
}
define void @i64_ls(i64 %x) nounwind {
- store i64 %x, i64* @i64_s
+ store i64 %x, ptr @i64_s
ret void
}
; RUN: llc -march=nvptx < %s > %t
; RUN: llc -march=nvptx64 < %s > %t
-@i1_s = external global i1 ; <i1*> [#uses=1]
-@i2_s = external global i2 ; <i2*> [#uses=1]
-@i3_s = external global i3 ; <i3*> [#uses=1]
-@i4_s = external global i4 ; <i4*> [#uses=1]
-@i5_s = external global i5 ; <i5*> [#uses=1]
-@i6_s = external global i6 ; <i6*> [#uses=1]
-@i7_s = external global i7 ; <i7*> [#uses=1]
-@i8_s = external global i8 ; <i8*> [#uses=1]
-@i9_s = external global i9 ; <i9*> [#uses=1]
-@i10_s = external global i10 ; <i10*> [#uses=1]
-@i11_s = external global i11 ; <i11*> [#uses=1]
-@i12_s = external global i12 ; <i12*> [#uses=1]
-@i13_s = external global i13 ; <i13*> [#uses=1]
-@i14_s = external global i14 ; <i14*> [#uses=1]
-@i15_s = external global i15 ; <i15*> [#uses=1]
-@i16_s = external global i16 ; <i16*> [#uses=1]
-@i17_s = external global i17 ; <i17*> [#uses=1]
-@i18_s = external global i18 ; <i18*> [#uses=1]
-@i19_s = external global i19 ; <i19*> [#uses=1]
-@i20_s = external global i20 ; <i20*> [#uses=1]
-@i21_s = external global i21 ; <i21*> [#uses=1]
-@i22_s = external global i22 ; <i22*> [#uses=1]
-@i23_s = external global i23 ; <i23*> [#uses=1]
-@i24_s = external global i24 ; <i24*> [#uses=1]
-@i25_s = external global i25 ; <i25*> [#uses=1]
-@i26_s = external global i26 ; <i26*> [#uses=1]
-@i27_s = external global i27 ; <i27*> [#uses=1]
-@i28_s = external global i28 ; <i28*> [#uses=1]
-@i29_s = external global i29 ; <i29*> [#uses=1]
-@i30_s = external global i30 ; <i30*> [#uses=1]
-@i31_s = external global i31 ; <i31*> [#uses=1]
-@i32_s = external global i32 ; <i32*> [#uses=1]
-@i33_s = external global i33 ; <i33*> [#uses=1]
-@i34_s = external global i34 ; <i34*> [#uses=1]
-@i35_s = external global i35 ; <i35*> [#uses=1]
-@i36_s = external global i36 ; <i36*> [#uses=1]
-@i37_s = external global i37 ; <i37*> [#uses=1]
-@i38_s = external global i38 ; <i38*> [#uses=1]
-@i39_s = external global i39 ; <i39*> [#uses=1]
-@i40_s = external global i40 ; <i40*> [#uses=1]
-@i41_s = external global i41 ; <i41*> [#uses=1]
-@i42_s = external global i42 ; <i42*> [#uses=1]
-@i43_s = external global i43 ; <i43*> [#uses=1]
-@i44_s = external global i44 ; <i44*> [#uses=1]
-@i45_s = external global i45 ; <i45*> [#uses=1]
-@i46_s = external global i46 ; <i46*> [#uses=1]
-@i47_s = external global i47 ; <i47*> [#uses=1]
-@i48_s = external global i48 ; <i48*> [#uses=1]
-@i49_s = external global i49 ; <i49*> [#uses=1]
-@i50_s = external global i50 ; <i50*> [#uses=1]
-@i51_s = external global i51 ; <i51*> [#uses=1]
-@i52_s = external global i52 ; <i52*> [#uses=1]
-@i53_s = external global i53 ; <i53*> [#uses=1]
-@i54_s = external global i54 ; <i54*> [#uses=1]
-@i55_s = external global i55 ; <i55*> [#uses=1]
-@i56_s = external global i56 ; <i56*> [#uses=1]
-@i57_s = external global i57 ; <i57*> [#uses=1]
-@i58_s = external global i58 ; <i58*> [#uses=1]
-@i59_s = external global i59 ; <i59*> [#uses=1]
-@i60_s = external global i60 ; <i60*> [#uses=1]
-@i61_s = external global i61 ; <i61*> [#uses=1]
-@i62_s = external global i62 ; <i62*> [#uses=1]
-@i63_s = external global i63 ; <i63*> [#uses=1]
-@i64_s = external global i64 ; <i64*> [#uses=1]
+@i1_s = external global i1 ; <ptr> [#uses=1]
+@i2_s = external global i2 ; <ptr> [#uses=1]
+@i3_s = external global i3 ; <ptr> [#uses=1]
+@i4_s = external global i4 ; <ptr> [#uses=1]
+@i5_s = external global i5 ; <ptr> [#uses=1]
+@i6_s = external global i6 ; <ptr> [#uses=1]
+@i7_s = external global i7 ; <ptr> [#uses=1]
+@i8_s = external global i8 ; <ptr> [#uses=1]
+@i9_s = external global i9 ; <ptr> [#uses=1]
+@i10_s = external global i10 ; <ptr> [#uses=1]
+@i11_s = external global i11 ; <ptr> [#uses=1]
+@i12_s = external global i12 ; <ptr> [#uses=1]
+@i13_s = external global i13 ; <ptr> [#uses=1]
+@i14_s = external global i14 ; <ptr> [#uses=1]
+@i15_s = external global i15 ; <ptr> [#uses=1]
+@i16_s = external global i16 ; <ptr> [#uses=1]
+@i17_s = external global i17 ; <ptr> [#uses=1]
+@i18_s = external global i18 ; <ptr> [#uses=1]
+@i19_s = external global i19 ; <ptr> [#uses=1]
+@i20_s = external global i20 ; <ptr> [#uses=1]
+@i21_s = external global i21 ; <ptr> [#uses=1]
+@i22_s = external global i22 ; <ptr> [#uses=1]
+@i23_s = external global i23 ; <ptr> [#uses=1]
+@i24_s = external global i24 ; <ptr> [#uses=1]
+@i25_s = external global i25 ; <ptr> [#uses=1]
+@i26_s = external global i26 ; <ptr> [#uses=1]
+@i27_s = external global i27 ; <ptr> [#uses=1]
+@i28_s = external global i28 ; <ptr> [#uses=1]
+@i29_s = external global i29 ; <ptr> [#uses=1]
+@i30_s = external global i30 ; <ptr> [#uses=1]
+@i31_s = external global i31 ; <ptr> [#uses=1]
+@i32_s = external global i32 ; <ptr> [#uses=1]
+@i33_s = external global i33 ; <ptr> [#uses=1]
+@i34_s = external global i34 ; <ptr> [#uses=1]
+@i35_s = external global i35 ; <ptr> [#uses=1]
+@i36_s = external global i36 ; <ptr> [#uses=1]
+@i37_s = external global i37 ; <ptr> [#uses=1]
+@i38_s = external global i38 ; <ptr> [#uses=1]
+@i39_s = external global i39 ; <ptr> [#uses=1]
+@i40_s = external global i40 ; <ptr> [#uses=1]
+@i41_s = external global i41 ; <ptr> [#uses=1]
+@i42_s = external global i42 ; <ptr> [#uses=1]
+@i43_s = external global i43 ; <ptr> [#uses=1]
+@i44_s = external global i44 ; <ptr> [#uses=1]
+@i45_s = external global i45 ; <ptr> [#uses=1]
+@i46_s = external global i46 ; <ptr> [#uses=1]
+@i47_s = external global i47 ; <ptr> [#uses=1]
+@i48_s = external global i48 ; <ptr> [#uses=1]
+@i49_s = external global i49 ; <ptr> [#uses=1]
+@i50_s = external global i50 ; <ptr> [#uses=1]
+@i51_s = external global i51 ; <ptr> [#uses=1]
+@i52_s = external global i52 ; <ptr> [#uses=1]
+@i53_s = external global i53 ; <ptr> [#uses=1]
+@i54_s = external global i54 ; <ptr> [#uses=1]
+@i55_s = external global i55 ; <ptr> [#uses=1]
+@i56_s = external global i56 ; <ptr> [#uses=1]
+@i57_s = external global i57 ; <ptr> [#uses=1]
+@i58_s = external global i58 ; <ptr> [#uses=1]
+@i59_s = external global i59 ; <ptr> [#uses=1]
+@i60_s = external global i60 ; <ptr> [#uses=1]
+@i61_s = external global i61 ; <ptr> [#uses=1]
+@i62_s = external global i62 ; <ptr> [#uses=1]
+@i63_s = external global i63 ; <ptr> [#uses=1]
+@i64_s = external global i64 ; <ptr> [#uses=1]
define void @i1_ls(i1 signext %x) nounwind {
- store i1 %x, i1* @i1_s
+ store i1 %x, ptr @i1_s
ret void
}
define void @i2_ls(i2 signext %x) nounwind {
- store i2 %x, i2* @i2_s
+ store i2 %x, ptr @i2_s
ret void
}
define void @i3_ls(i3 signext %x) nounwind {
- store i3 %x, i3* @i3_s
+ store i3 %x, ptr @i3_s
ret void
}
define void @i4_ls(i4 signext %x) nounwind {
- store i4 %x, i4* @i4_s
+ store i4 %x, ptr @i4_s
ret void
}
define void @i5_ls(i5 signext %x) nounwind {
- store i5 %x, i5* @i5_s
+ store i5 %x, ptr @i5_s
ret void
}
define void @i6_ls(i6 signext %x) nounwind {
- store i6 %x, i6* @i6_s
+ store i6 %x, ptr @i6_s
ret void
}
define void @i7_ls(i7 signext %x) nounwind {
- store i7 %x, i7* @i7_s
+ store i7 %x, ptr @i7_s
ret void
}
define void @i8_ls(i8 signext %x) nounwind {
- store i8 %x, i8* @i8_s
+ store i8 %x, ptr @i8_s
ret void
}
define void @i9_ls(i9 signext %x) nounwind {
- store i9 %x, i9* @i9_s
+ store i9 %x, ptr @i9_s
ret void
}
define void @i10_ls(i10 signext %x) nounwind {
- store i10 %x, i10* @i10_s
+ store i10 %x, ptr @i10_s
ret void
}
define void @i11_ls(i11 signext %x) nounwind {
- store i11 %x, i11* @i11_s
+ store i11 %x, ptr @i11_s
ret void
}
define void @i12_ls(i12 signext %x) nounwind {
- store i12 %x, i12* @i12_s
+ store i12 %x, ptr @i12_s
ret void
}
define void @i13_ls(i13 signext %x) nounwind {
- store i13 %x, i13* @i13_s
+ store i13 %x, ptr @i13_s
ret void
}
define void @i14_ls(i14 signext %x) nounwind {
- store i14 %x, i14* @i14_s
+ store i14 %x, ptr @i14_s
ret void
}
define void @i15_ls(i15 signext %x) nounwind {
- store i15 %x, i15* @i15_s
+ store i15 %x, ptr @i15_s
ret void
}
define void @i16_ls(i16 signext %x) nounwind {
- store i16 %x, i16* @i16_s
+ store i16 %x, ptr @i16_s
ret void
}
define void @i17_ls(i17 signext %x) nounwind {
- store i17 %x, i17* @i17_s
+ store i17 %x, ptr @i17_s
ret void
}
define void @i18_ls(i18 signext %x) nounwind {
- store i18 %x, i18* @i18_s
+ store i18 %x, ptr @i18_s
ret void
}
define void @i19_ls(i19 signext %x) nounwind {
- store i19 %x, i19* @i19_s
+ store i19 %x, ptr @i19_s
ret void
}
define void @i20_ls(i20 signext %x) nounwind {
- store i20 %x, i20* @i20_s
+ store i20 %x, ptr @i20_s
ret void
}
define void @i21_ls(i21 signext %x) nounwind {
- store i21 %x, i21* @i21_s
+ store i21 %x, ptr @i21_s
ret void
}
define void @i22_ls(i22 signext %x) nounwind {
- store i22 %x, i22* @i22_s
+ store i22 %x, ptr @i22_s
ret void
}
define void @i23_ls(i23 signext %x) nounwind {
- store i23 %x, i23* @i23_s
+ store i23 %x, ptr @i23_s
ret void
}
define void @i24_ls(i24 signext %x) nounwind {
- store i24 %x, i24* @i24_s
+ store i24 %x, ptr @i24_s
ret void
}
define void @i25_ls(i25 signext %x) nounwind {
- store i25 %x, i25* @i25_s
+ store i25 %x, ptr @i25_s
ret void
}
define void @i26_ls(i26 signext %x) nounwind {
- store i26 %x, i26* @i26_s
+ store i26 %x, ptr @i26_s
ret void
}
define void @i27_ls(i27 signext %x) nounwind {
- store i27 %x, i27* @i27_s
+ store i27 %x, ptr @i27_s
ret void
}
define void @i28_ls(i28 signext %x) nounwind {
- store i28 %x, i28* @i28_s
+ store i28 %x, ptr @i28_s
ret void
}
define void @i29_ls(i29 signext %x) nounwind {
- store i29 %x, i29* @i29_s
+ store i29 %x, ptr @i29_s
ret void
}
define void @i30_ls(i30 signext %x) nounwind {
- store i30 %x, i30* @i30_s
+ store i30 %x, ptr @i30_s
ret void
}
define void @i31_ls(i31 signext %x) nounwind {
- store i31 %x, i31* @i31_s
+ store i31 %x, ptr @i31_s
ret void
}
define void @i32_ls(i32 signext %x) nounwind {
- store i32 %x, i32* @i32_s
+ store i32 %x, ptr @i32_s
ret void
}
define void @i33_ls(i33 signext %x) nounwind {
- store i33 %x, i33* @i33_s
+ store i33 %x, ptr @i33_s
ret void
}
define void @i34_ls(i34 signext %x) nounwind {
- store i34 %x, i34* @i34_s
+ store i34 %x, ptr @i34_s
ret void
}
define void @i35_ls(i35 signext %x) nounwind {
- store i35 %x, i35* @i35_s
+ store i35 %x, ptr @i35_s
ret void
}
define void @i36_ls(i36 signext %x) nounwind {
- store i36 %x, i36* @i36_s
+ store i36 %x, ptr @i36_s
ret void
}
define void @i37_ls(i37 signext %x) nounwind {
- store i37 %x, i37* @i37_s
+ store i37 %x, ptr @i37_s
ret void
}
define void @i38_ls(i38 signext %x) nounwind {
- store i38 %x, i38* @i38_s
+ store i38 %x, ptr @i38_s
ret void
}
define void @i39_ls(i39 signext %x) nounwind {
- store i39 %x, i39* @i39_s
+ store i39 %x, ptr @i39_s
ret void
}
define void @i40_ls(i40 signext %x) nounwind {
- store i40 %x, i40* @i40_s
+ store i40 %x, ptr @i40_s
ret void
}
define void @i41_ls(i41 signext %x) nounwind {
- store i41 %x, i41* @i41_s
+ store i41 %x, ptr @i41_s
ret void
}
define void @i42_ls(i42 signext %x) nounwind {
- store i42 %x, i42* @i42_s
+ store i42 %x, ptr @i42_s
ret void
}
define void @i43_ls(i43 signext %x) nounwind {
- store i43 %x, i43* @i43_s
+ store i43 %x, ptr @i43_s
ret void
}
define void @i44_ls(i44 signext %x) nounwind {
- store i44 %x, i44* @i44_s
+ store i44 %x, ptr @i44_s
ret void
}
define void @i45_ls(i45 signext %x) nounwind {
- store i45 %x, i45* @i45_s
+ store i45 %x, ptr @i45_s
ret void
}
define void @i46_ls(i46 signext %x) nounwind {
- store i46 %x, i46* @i46_s
+ store i46 %x, ptr @i46_s
ret void
}
define void @i47_ls(i47 signext %x) nounwind {
- store i47 %x, i47* @i47_s
+ store i47 %x, ptr @i47_s
ret void
}
define void @i48_ls(i48 signext %x) nounwind {
- store i48 %x, i48* @i48_s
+ store i48 %x, ptr @i48_s
ret void
}
define void @i49_ls(i49 signext %x) nounwind {
- store i49 %x, i49* @i49_s
+ store i49 %x, ptr @i49_s
ret void
}
define void @i50_ls(i50 signext %x) nounwind {
- store i50 %x, i50* @i50_s
+ store i50 %x, ptr @i50_s
ret void
}
define void @i51_ls(i51 signext %x) nounwind {
- store i51 %x, i51* @i51_s
+ store i51 %x, ptr @i51_s
ret void
}
define void @i52_ls(i52 signext %x) nounwind {
- store i52 %x, i52* @i52_s
+ store i52 %x, ptr @i52_s
ret void
}
define void @i53_ls(i53 signext %x) nounwind {
- store i53 %x, i53* @i53_s
+ store i53 %x, ptr @i53_s
ret void
}
define void @i54_ls(i54 signext %x) nounwind {
- store i54 %x, i54* @i54_s
+ store i54 %x, ptr @i54_s
ret void
}
define void @i55_ls(i55 signext %x) nounwind {
- store i55 %x, i55* @i55_s
+ store i55 %x, ptr @i55_s
ret void
}
define void @i56_ls(i56 signext %x) nounwind {
- store i56 %x, i56* @i56_s
+ store i56 %x, ptr @i56_s
ret void
}
define void @i57_ls(i57 signext %x) nounwind {
- store i57 %x, i57* @i57_s
+ store i57 %x, ptr @i57_s
ret void
}
define void @i58_ls(i58 signext %x) nounwind {
- store i58 %x, i58* @i58_s
+ store i58 %x, ptr @i58_s
ret void
}
define void @i59_ls(i59 signext %x) nounwind {
- store i59 %x, i59* @i59_s
+ store i59 %x, ptr @i59_s
ret void
}
define void @i60_ls(i60 signext %x) nounwind {
- store i60 %x, i60* @i60_s
+ store i60 %x, ptr @i60_s
ret void
}
define void @i61_ls(i61 signext %x) nounwind {
- store i61 %x, i61* @i61_s
+ store i61 %x, ptr @i61_s
ret void
}
define void @i62_ls(i62 signext %x) nounwind {
- store i62 %x, i62* @i62_s
+ store i62 %x, ptr @i62_s
ret void
}
define void @i63_ls(i63 signext %x) nounwind {
- store i63 %x, i63* @i63_s
+ store i63 %x, ptr @i63_s
ret void
}
define void @i64_ls(i64 signext %x) nounwind {
- store i64 %x, i64* @i64_s
+ store i64 %x, ptr @i64_s
ret void
}
; RUN: llc -march=nvptx < %s > %t
; RUN: llc -march=nvptx64 < %s > %t
-@i1_s = external global i1 ; <i1*> [#uses=1]
-@i2_s = external global i2 ; <i2*> [#uses=1]
-@i3_s = external global i3 ; <i3*> [#uses=1]
-@i4_s = external global i4 ; <i4*> [#uses=1]
-@i5_s = external global i5 ; <i5*> [#uses=1]
-@i6_s = external global i6 ; <i6*> [#uses=1]
-@i7_s = external global i7 ; <i7*> [#uses=1]
-@i8_s = external global i8 ; <i8*> [#uses=1]
-@i9_s = external global i9 ; <i9*> [#uses=1]
-@i10_s = external global i10 ; <i10*> [#uses=1]
-@i11_s = external global i11 ; <i11*> [#uses=1]
-@i12_s = external global i12 ; <i12*> [#uses=1]
-@i13_s = external global i13 ; <i13*> [#uses=1]
-@i14_s = external global i14 ; <i14*> [#uses=1]
-@i15_s = external global i15 ; <i15*> [#uses=1]
-@i16_s = external global i16 ; <i16*> [#uses=1]
-@i17_s = external global i17 ; <i17*> [#uses=1]
-@i18_s = external global i18 ; <i18*> [#uses=1]
-@i19_s = external global i19 ; <i19*> [#uses=1]
-@i20_s = external global i20 ; <i20*> [#uses=1]
-@i21_s = external global i21 ; <i21*> [#uses=1]
-@i22_s = external global i22 ; <i22*> [#uses=1]
-@i23_s = external global i23 ; <i23*> [#uses=1]
-@i24_s = external global i24 ; <i24*> [#uses=1]
-@i25_s = external global i25 ; <i25*> [#uses=1]
-@i26_s = external global i26 ; <i26*> [#uses=1]
-@i27_s = external global i27 ; <i27*> [#uses=1]
-@i28_s = external global i28 ; <i28*> [#uses=1]
-@i29_s = external global i29 ; <i29*> [#uses=1]
-@i30_s = external global i30 ; <i30*> [#uses=1]
-@i31_s = external global i31 ; <i31*> [#uses=1]
-@i32_s = external global i32 ; <i32*> [#uses=1]
-@i33_s = external global i33 ; <i33*> [#uses=1]
-@i34_s = external global i34 ; <i34*> [#uses=1]
-@i35_s = external global i35 ; <i35*> [#uses=1]
-@i36_s = external global i36 ; <i36*> [#uses=1]
-@i37_s = external global i37 ; <i37*> [#uses=1]
-@i38_s = external global i38 ; <i38*> [#uses=1]
-@i39_s = external global i39 ; <i39*> [#uses=1]
-@i40_s = external global i40 ; <i40*> [#uses=1]
-@i41_s = external global i41 ; <i41*> [#uses=1]
-@i42_s = external global i42 ; <i42*> [#uses=1]
-@i43_s = external global i43 ; <i43*> [#uses=1]
-@i44_s = external global i44 ; <i44*> [#uses=1]
-@i45_s = external global i45 ; <i45*> [#uses=1]
-@i46_s = external global i46 ; <i46*> [#uses=1]
-@i47_s = external global i47 ; <i47*> [#uses=1]
-@i48_s = external global i48 ; <i48*> [#uses=1]
-@i49_s = external global i49 ; <i49*> [#uses=1]
-@i50_s = external global i50 ; <i50*> [#uses=1]
-@i51_s = external global i51 ; <i51*> [#uses=1]
-@i52_s = external global i52 ; <i52*> [#uses=1]
-@i53_s = external global i53 ; <i53*> [#uses=1]
-@i54_s = external global i54 ; <i54*> [#uses=1]
-@i55_s = external global i55 ; <i55*> [#uses=1]
-@i56_s = external global i56 ; <i56*> [#uses=1]
-@i57_s = external global i57 ; <i57*> [#uses=1]
-@i58_s = external global i58 ; <i58*> [#uses=1]
-@i59_s = external global i59 ; <i59*> [#uses=1]
-@i60_s = external global i60 ; <i60*> [#uses=1]
-@i61_s = external global i61 ; <i61*> [#uses=1]
-@i62_s = external global i62 ; <i62*> [#uses=1]
-@i63_s = external global i63 ; <i63*> [#uses=1]
-@i64_s = external global i64 ; <i64*> [#uses=1]
+@i1_s = external global i1 ; <ptr> [#uses=1]
+@i2_s = external global i2 ; <ptr> [#uses=1]
+@i3_s = external global i3 ; <ptr> [#uses=1]
+@i4_s = external global i4 ; <ptr> [#uses=1]
+@i5_s = external global i5 ; <ptr> [#uses=1]
+@i6_s = external global i6 ; <ptr> [#uses=1]
+@i7_s = external global i7 ; <ptr> [#uses=1]
+@i8_s = external global i8 ; <ptr> [#uses=1]
+@i9_s = external global i9 ; <ptr> [#uses=1]
+@i10_s = external global i10 ; <ptr> [#uses=1]
+@i11_s = external global i11 ; <ptr> [#uses=1]
+@i12_s = external global i12 ; <ptr> [#uses=1]
+@i13_s = external global i13 ; <ptr> [#uses=1]
+@i14_s = external global i14 ; <ptr> [#uses=1]
+@i15_s = external global i15 ; <ptr> [#uses=1]
+@i16_s = external global i16 ; <ptr> [#uses=1]
+@i17_s = external global i17 ; <ptr> [#uses=1]
+@i18_s = external global i18 ; <ptr> [#uses=1]
+@i19_s = external global i19 ; <ptr> [#uses=1]
+@i20_s = external global i20 ; <ptr> [#uses=1]
+@i21_s = external global i21 ; <ptr> [#uses=1]
+@i22_s = external global i22 ; <ptr> [#uses=1]
+@i23_s = external global i23 ; <ptr> [#uses=1]
+@i24_s = external global i24 ; <ptr> [#uses=1]
+@i25_s = external global i25 ; <ptr> [#uses=1]
+@i26_s = external global i26 ; <ptr> [#uses=1]
+@i27_s = external global i27 ; <ptr> [#uses=1]
+@i28_s = external global i28 ; <ptr> [#uses=1]
+@i29_s = external global i29 ; <ptr> [#uses=1]
+@i30_s = external global i30 ; <ptr> [#uses=1]
+@i31_s = external global i31 ; <ptr> [#uses=1]
+@i32_s = external global i32 ; <ptr> [#uses=1]
+@i33_s = external global i33 ; <ptr> [#uses=1]
+@i34_s = external global i34 ; <ptr> [#uses=1]
+@i35_s = external global i35 ; <ptr> [#uses=1]
+@i36_s = external global i36 ; <ptr> [#uses=1]
+@i37_s = external global i37 ; <ptr> [#uses=1]
+@i38_s = external global i38 ; <ptr> [#uses=1]
+@i39_s = external global i39 ; <ptr> [#uses=1]
+@i40_s = external global i40 ; <ptr> [#uses=1]
+@i41_s = external global i41 ; <ptr> [#uses=1]
+@i42_s = external global i42 ; <ptr> [#uses=1]
+@i43_s = external global i43 ; <ptr> [#uses=1]
+@i44_s = external global i44 ; <ptr> [#uses=1]
+@i45_s = external global i45 ; <ptr> [#uses=1]
+@i46_s = external global i46 ; <ptr> [#uses=1]
+@i47_s = external global i47 ; <ptr> [#uses=1]
+@i48_s = external global i48 ; <ptr> [#uses=1]
+@i49_s = external global i49 ; <ptr> [#uses=1]
+@i50_s = external global i50 ; <ptr> [#uses=1]
+@i51_s = external global i51 ; <ptr> [#uses=1]
+@i52_s = external global i52 ; <ptr> [#uses=1]
+@i53_s = external global i53 ; <ptr> [#uses=1]
+@i54_s = external global i54 ; <ptr> [#uses=1]
+@i55_s = external global i55 ; <ptr> [#uses=1]
+@i56_s = external global i56 ; <ptr> [#uses=1]
+@i57_s = external global i57 ; <ptr> [#uses=1]
+@i58_s = external global i58 ; <ptr> [#uses=1]
+@i59_s = external global i59 ; <ptr> [#uses=1]
+@i60_s = external global i60 ; <ptr> [#uses=1]
+@i61_s = external global i61 ; <ptr> [#uses=1]
+@i62_s = external global i62 ; <ptr> [#uses=1]
+@i63_s = external global i63 ; <ptr> [#uses=1]
+@i64_s = external global i64 ; <ptr> [#uses=1]
define void @i1_ls(i1 zeroext %x) nounwind {
- store i1 %x, i1* @i1_s
+ store i1 %x, ptr @i1_s
ret void
}
define void @i2_ls(i2 zeroext %x) nounwind {
- store i2 %x, i2* @i2_s
+ store i2 %x, ptr @i2_s
ret void
}
define void @i3_ls(i3 zeroext %x) nounwind {
- store i3 %x, i3* @i3_s
+ store i3 %x, ptr @i3_s
ret void
}
define void @i4_ls(i4 zeroext %x) nounwind {
- store i4 %x, i4* @i4_s
+ store i4 %x, ptr @i4_s
ret void
}
define void @i5_ls(i5 zeroext %x) nounwind {
- store i5 %x, i5* @i5_s
+ store i5 %x, ptr @i5_s
ret void
}
define void @i6_ls(i6 zeroext %x) nounwind {
- store i6 %x, i6* @i6_s
+ store i6 %x, ptr @i6_s
ret void
}
define void @i7_ls(i7 zeroext %x) nounwind {
- store i7 %x, i7* @i7_s
+ store i7 %x, ptr @i7_s
ret void
}
define void @i8_ls(i8 zeroext %x) nounwind {
- store i8 %x, i8* @i8_s
+ store i8 %x, ptr @i8_s
ret void
}
define void @i9_ls(i9 zeroext %x) nounwind {
- store i9 %x, i9* @i9_s
+ store i9 %x, ptr @i9_s
ret void
}
define void @i10_ls(i10 zeroext %x) nounwind {
- store i10 %x, i10* @i10_s
+ store i10 %x, ptr @i10_s
ret void
}
define void @i11_ls(i11 zeroext %x) nounwind {
- store i11 %x, i11* @i11_s
+ store i11 %x, ptr @i11_s
ret void
}
define void @i12_ls(i12 zeroext %x) nounwind {
- store i12 %x, i12* @i12_s
+ store i12 %x, ptr @i12_s
ret void
}
define void @i13_ls(i13 zeroext %x) nounwind {
- store i13 %x, i13* @i13_s
+ store i13 %x, ptr @i13_s
ret void
}
define void @i14_ls(i14 zeroext %x) nounwind {
- store i14 %x, i14* @i14_s
+ store i14 %x, ptr @i14_s
ret void
}
define void @i15_ls(i15 zeroext %x) nounwind {
- store i15 %x, i15* @i15_s
+ store i15 %x, ptr @i15_s
ret void
}
define void @i16_ls(i16 zeroext %x) nounwind {
- store i16 %x, i16* @i16_s
+ store i16 %x, ptr @i16_s
ret void
}
define void @i17_ls(i17 zeroext %x) nounwind {
- store i17 %x, i17* @i17_s
+ store i17 %x, ptr @i17_s
ret void
}
define void @i18_ls(i18 zeroext %x) nounwind {
- store i18 %x, i18* @i18_s
+ store i18 %x, ptr @i18_s
ret void
}
define void @i19_ls(i19 zeroext %x) nounwind {
- store i19 %x, i19* @i19_s
+ store i19 %x, ptr @i19_s
ret void
}
define void @i20_ls(i20 zeroext %x) nounwind {
- store i20 %x, i20* @i20_s
+ store i20 %x, ptr @i20_s
ret void
}
define void @i21_ls(i21 zeroext %x) nounwind {
- store i21 %x, i21* @i21_s
+ store i21 %x, ptr @i21_s
ret void
}
define void @i22_ls(i22 zeroext %x) nounwind {
- store i22 %x, i22* @i22_s
+ store i22 %x, ptr @i22_s
ret void
}
define void @i23_ls(i23 zeroext %x) nounwind {
- store i23 %x, i23* @i23_s
+ store i23 %x, ptr @i23_s
ret void
}
define void @i24_ls(i24 zeroext %x) nounwind {
- store i24 %x, i24* @i24_s
+ store i24 %x, ptr @i24_s
ret void
}
define void @i25_ls(i25 zeroext %x) nounwind {
- store i25 %x, i25* @i25_s
+ store i25 %x, ptr @i25_s
ret void
}
define void @i26_ls(i26 zeroext %x) nounwind {
- store i26 %x, i26* @i26_s
+ store i26 %x, ptr @i26_s
ret void
}
define void @i27_ls(i27 zeroext %x) nounwind {
- store i27 %x, i27* @i27_s
+ store i27 %x, ptr @i27_s
ret void
}
define void @i28_ls(i28 zeroext %x) nounwind {
- store i28 %x, i28* @i28_s
+ store i28 %x, ptr @i28_s
ret void
}
define void @i29_ls(i29 zeroext %x) nounwind {
- store i29 %x, i29* @i29_s
+ store i29 %x, ptr @i29_s
ret void
}
define void @i30_ls(i30 zeroext %x) nounwind {
- store i30 %x, i30* @i30_s
+ store i30 %x, ptr @i30_s
ret void
}
define void @i31_ls(i31 zeroext %x) nounwind {
- store i31 %x, i31* @i31_s
+ store i31 %x, ptr @i31_s
ret void
}
define void @i32_ls(i32 zeroext %x) nounwind {
- store i32 %x, i32* @i32_s
+ store i32 %x, ptr @i32_s
ret void
}
define void @i33_ls(i33 zeroext %x) nounwind {
- store i33 %x, i33* @i33_s
+ store i33 %x, ptr @i33_s
ret void
}
define void @i34_ls(i34 zeroext %x) nounwind {
- store i34 %x, i34* @i34_s
+ store i34 %x, ptr @i34_s
ret void
}
define void @i35_ls(i35 zeroext %x) nounwind {
- store i35 %x, i35* @i35_s
+ store i35 %x, ptr @i35_s
ret void
}
define void @i36_ls(i36 zeroext %x) nounwind {
- store i36 %x, i36* @i36_s
+ store i36 %x, ptr @i36_s
ret void
}
define void @i37_ls(i37 zeroext %x) nounwind {
- store i37 %x, i37* @i37_s
+ store i37 %x, ptr @i37_s
ret void
}
define void @i38_ls(i38 zeroext %x) nounwind {
- store i38 %x, i38* @i38_s
+ store i38 %x, ptr @i38_s
ret void
}
define void @i39_ls(i39 zeroext %x) nounwind {
- store i39 %x, i39* @i39_s
+ store i39 %x, ptr @i39_s
ret void
}
define void @i40_ls(i40 zeroext %x) nounwind {
- store i40 %x, i40* @i40_s
+ store i40 %x, ptr @i40_s
ret void
}
define void @i41_ls(i41 zeroext %x) nounwind {
- store i41 %x, i41* @i41_s
+ store i41 %x, ptr @i41_s
ret void
}
define void @i42_ls(i42 zeroext %x) nounwind {
- store i42 %x, i42* @i42_s
+ store i42 %x, ptr @i42_s
ret void
}
define void @i43_ls(i43 zeroext %x) nounwind {
- store i43 %x, i43* @i43_s
+ store i43 %x, ptr @i43_s
ret void
}
define void @i44_ls(i44 zeroext %x) nounwind {
- store i44 %x, i44* @i44_s
+ store i44 %x, ptr @i44_s
ret void
}
define void @i45_ls(i45 zeroext %x) nounwind {
- store i45 %x, i45* @i45_s
+ store i45 %x, ptr @i45_s
ret void
}
define void @i46_ls(i46 zeroext %x) nounwind {
- store i46 %x, i46* @i46_s
+ store i46 %x, ptr @i46_s
ret void
}
define void @i47_ls(i47 zeroext %x) nounwind {
- store i47 %x, i47* @i47_s
+ store i47 %x, ptr @i47_s
ret void
}
define void @i48_ls(i48 zeroext %x) nounwind {
- store i48 %x, i48* @i48_s
+ store i48 %x, ptr @i48_s
ret void
}
define void @i49_ls(i49 zeroext %x) nounwind {
- store i49 %x, i49* @i49_s
+ store i49 %x, ptr @i49_s
ret void
}
define void @i50_ls(i50 zeroext %x) nounwind {
- store i50 %x, i50* @i50_s
+ store i50 %x, ptr @i50_s
ret void
}
define void @i51_ls(i51 zeroext %x) nounwind {
- store i51 %x, i51* @i51_s
+ store i51 %x, ptr @i51_s
ret void
}
define void @i52_ls(i52 zeroext %x) nounwind {
- store i52 %x, i52* @i52_s
+ store i52 %x, ptr @i52_s
ret void
}
define void @i53_ls(i53 zeroext %x) nounwind {
- store i53 %x, i53* @i53_s
+ store i53 %x, ptr @i53_s
ret void
}
define void @i54_ls(i54 zeroext %x) nounwind {
- store i54 %x, i54* @i54_s
+ store i54 %x, ptr @i54_s
ret void
}
define void @i55_ls(i55 zeroext %x) nounwind {
- store i55 %x, i55* @i55_s
+ store i55 %x, ptr @i55_s
ret void
}
define void @i56_ls(i56 zeroext %x) nounwind {
- store i56 %x, i56* @i56_s
+ store i56 %x, ptr @i56_s
ret void
}
define void @i57_ls(i57 zeroext %x) nounwind {
- store i57 %x, i57* @i57_s
+ store i57 %x, ptr @i57_s
ret void
}
define void @i58_ls(i58 zeroext %x) nounwind {
- store i58 %x, i58* @i58_s
+ store i58 %x, ptr @i58_s
ret void
}
define void @i59_ls(i59 zeroext %x) nounwind {
- store i59 %x, i59* @i59_s
+ store i59 %x, ptr @i59_s
ret void
}
define void @i60_ls(i60 zeroext %x) nounwind {
- store i60 %x, i60* @i60_s
+ store i60 %x, ptr @i60_s
ret void
}
define void @i61_ls(i61 zeroext %x) nounwind {
- store i61 %x, i61* @i61_s
+ store i61 %x, ptr @i61_s
ret void
}
define void @i62_ls(i62 zeroext %x) nounwind {
- store i62 %x, i62* @i62_s
+ store i62 %x, ptr @i62_s
ret void
}
define void @i63_ls(i63 zeroext %x) nounwind {
- store i63 %x, i63* @i63_s
+ store i63 %x, ptr @i63_s
ret void
}
define void @i64_ls(i64 zeroext %x) nounwind {
- store i64 %x, i64* @i64_s
+ store i64 %x, ptr @i64_s
ret void
}
; ENABLED: ld.v2.{{.}}32
; DISABLED: ld.{{.}}32
; DISABLED: ld.{{.}}32
-define i32 @f(i32* %p) {
- %p.1 = getelementptr i32, i32* %p, i32 1
- %v0 = load i32, i32* %p, align 8
- %v1 = load i32, i32* %p.1, align 4
+define i32 @f(ptr %p) {
+ %p.1 = getelementptr i32, ptr %p, i32 1
+ %v0 = load i32, ptr %p, align 8
+ %v1 = load i32, ptr %p.1, align 4
%sum = add i32 %v0, %v1
ret i32 %sum
}
-define half @fh(half* %p) {
- %p.1 = getelementptr half, half* %p, i32 1
- %p.2 = getelementptr half, half* %p, i32 2
- %p.3 = getelementptr half, half* %p, i32 3
- %p.4 = getelementptr half, half* %p, i32 4
- %v0 = load half, half* %p, align 64
- %v1 = load half, half* %p.1, align 4
- %v2 = load half, half* %p.2, align 4
- %v3 = load half, half* %p.3, align 4
- %v4 = load half, half* %p.4, align 4
+define half @fh(ptr %p) {
+ %p.1 = getelementptr half, ptr %p, i32 1
+ %p.2 = getelementptr half, ptr %p, i32 2
+ %p.3 = getelementptr half, ptr %p, i32 3
+ %p.4 = getelementptr half, ptr %p, i32 4
+ %v0 = load half, ptr %p, align 64
+ %v1 = load half, ptr %p.1, align 4
+ %v2 = load half, ptr %p.2, align 4
+ %v3 = load half, ptr %p.3, align 4
+ %v4 = load half, ptr %p.4, align 4
%sum1 = fadd half %v0, %v1
%sum2 = fadd half %v2, %v3
%sum3 = fadd half %sum1, %sum2
ret half %sum
}
-define float @ff(float* %p) {
- %p.1 = getelementptr float, float* %p, i32 1
- %p.2 = getelementptr float, float* %p, i32 2
- %p.3 = getelementptr float, float* %p, i32 3
- %p.4 = getelementptr float, float* %p, i32 4
- %v0 = load float, float* %p, align 64
- %v1 = load float, float* %p.1, align 4
- %v2 = load float, float* %p.2, align 4
- %v3 = load float, float* %p.3, align 4
- %v4 = load float, float* %p.4, align 4
+define float @ff(ptr %p) {
+ %p.1 = getelementptr float, ptr %p, i32 1
+ %p.2 = getelementptr float, ptr %p, i32 2
+ %p.3 = getelementptr float, ptr %p, i32 3
+ %p.4 = getelementptr float, ptr %p, i32 4
+ %v0 = load float, ptr %p, align 64
+ %v1 = load float, ptr %p.1, align 4
+ %v2 = load float, ptr %p.2, align 4
+ %v3 = load float, ptr %p.3, align 4
+ %v4 = load float, ptr %p.4, align 4
%sum1 = fadd float %v0, %v1
%sum2 = fadd float %v2, %v3
%sum3 = fadd float %sum1, %sum2
; Load a value, then call a function. Branch, and use the loaded value only on
; one side of the branch. The load shouldn't be sunk beneath the call, because
; the call may modify memory.
-define i32 @f(i32 %x, i32* %ptr, i1 %cond) {
+define i32 @f(i32 %x, ptr %ptr, i1 %cond) {
Start:
; CHECK: ld.u32
- %ptr_val = load i32, i32* %ptr
+ %ptr_val = load i32, ptr %ptr
; CHECK: call.uni
call void @foo()
br i1 %cond, label %L1, label %L2
; Load a value, then syncthreads. Branch, and use the loaded value only on one
; side of the branch. The load shouldn't be sunk beneath the call, because
; syncthreads is modeled as maystore.
-define i32 @f(i32 %x, i32* %ptr, i1 %cond) {
+define i32 @f(i32 %x, ptr %ptr, i1 %cond) {
Start:
; CHECK: ld.u32
- %ptr_val = load i32, i32* %ptr
+ %ptr_val = load i32, ptr %ptr
; CHECK: bar.sync
call void @llvm.nvvm.barrier0()
br i1 %cond, label %L1, label %L2
; CHECK: .func call_syncthreads
; CHECK: bar.sync
; CHECK-NOT: bar.sync
-define void @call_syncthreads(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
+define void @call_syncthreads(ptr %a, ptr %b, i1 %cond, i1 %cond2) nounwind {
br i1 %cond, label %L1, label %L2
br i1 %cond2, label %Ret, label %L1
Ret:
ret void
L1:
- store i32 0, i32* %a
+ store i32 0, ptr %a
br label %L42
L2:
- store i32 1, i32* %a
+ store i32 1, ptr %a
br label %L42
L42:
call void @llvm.nvvm.barrier0()
; CHECK: .func call_foo
; CHECK: call
; CHECK: call
-define void @call_foo(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
+define void @call_foo(ptr %a, ptr %b, i1 %cond, i1 %cond2) nounwind {
br i1 %cond, label %L1, label %L2
br i1 %cond2, label %Ret, label %L1
Ret:
ret void
L1:
- store i32 0, i32* %a
+ store i32 0, ptr %a
br label %L42
L2:
- store i32 1, i32* %a
+ store i32 1, ptr %a
br label %L42
L42:
call void @foo()
@g = addrspace(1) global i32 42
@ga = addrspace(1) global [4 x i8] c"\00\01\02\03"
-@g2 = addrspace(1) global i32* addrspacecast (i32 addrspace(1)* @g to i32*)
-@g3 = addrspace(1) global i32 addrspace(1)* @g
-@g4 = constant {i32*, i32*} {i32* null, i32* addrspacecast (i32 addrspace(1)* @g to i32*)}
-@g5 = constant {i32*, i32*} {i32* null, i32* addrspacecast (i32 addrspace(1)* getelementptr (i32, i32 addrspace(1)* @g, i32 2) to i32*)}
+@g2 = addrspace(1) global ptr addrspacecast (ptr addrspace(1) @g to ptr)
+@g3 = addrspace(1) global ptr addrspace(1) @g
+@g4 = constant {ptr, ptr} {ptr null, ptr addrspacecast (ptr addrspace(1) @g to ptr)}
+@g5 = constant {ptr, ptr} {ptr null, ptr addrspacecast (ptr addrspace(1) getelementptr (i32, ptr addrspace(1) @g, i32 2) to ptr)}
; CHECK: .visible .global .align 4 .u32 g6 = generic(ga)+2;
-@g6 = addrspace(1) global i8* getelementptr inbounds (
- [4 x i8], [4 x i8]* addrspacecast ([4 x i8] addrspace(1)* @ga to [4 x i8]*),
+@g6 = addrspace(1) global ptr getelementptr inbounds (
+ [4 x i8], ptr addrspacecast (ptr addrspace(1) @ga to ptr),
i32 0, i32 2
)
; CHECK: .visible .global .align 4 .u32 g7 = generic(g);
-@g7 = addrspace(1) global i8* addrspacecast (
- i8 addrspace(1)* bitcast (i32 addrspace(1)* @g to i8 addrspace(1)*)
- to i8*
+@g7 = addrspace(1) global ptr addrspacecast (
+ ptr addrspace(1) @g
+ to ptr
)
; CHECK: .visible .global .align 4 .u32 g8[2] = {0, g};
-@g8 = addrspace(1) global [2 x i32 addrspace(1)*] [i32 addrspace(1)* null, i32 addrspace(1)* @g]
+@g8 = addrspace(1) global [2 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @g]
; CHECK: .visible .global .align 4 .u32 g9[2] = {0, generic(g)};
-@g9 = addrspace(1) global [2 x i32*] [
- i32* null,
- i32* addrspacecast (i32 addrspace(1)* @g to i32*)
+@g9 = addrspace(1) global [2 x ptr] [
+ ptr null,
+ ptr addrspacecast (ptr addrspace(1) @g to ptr)
]
; CHECK: .visible .global .align 4 .u32 g10[2] = {0, g};
-@g10 = addrspace(1) global [2 x i8 addrspace(1)*] [
- i8 addrspace(1)* null,
- i8 addrspace(1)* bitcast (i32 addrspace(1)* @g to i8 addrspace(1)*)
+@g10 = addrspace(1) global [2 x ptr addrspace(1)] [
+ ptr addrspace(1) null,
+ ptr addrspace(1) @g
]
; CHECK: .visible .global .align 4 .u32 g11[2] = {0, generic(g)};
-@g11 = addrspace(1) global [2 x i8*] [
- i8* null,
- i8* bitcast (i32* addrspacecast (i32 addrspace(1)* @g to i32*) to i8*)
+@g11 = addrspace(1) global [2 x ptr] [
+ ptr null,
+ ptr addrspacecast (ptr addrspace(1) @g to ptr)
]
declare [2 x float] @bara([2 x float] %input)
declare {float, float} @bars({float, float} %input)
-define void @test_v2f32(<2 x float> %input, <2 x float>* %output) {
+define void @test_v2f32(<2 x float> %input, ptr %output) {
; CHECK-LABEL: @test_v2f32
%call = tail call <2 x float> @barv(<2 x float> %input)
; CHECK: .param .align 8 .b8 retval0[8];
; CHECK: ld.param.v2.f32 {[[E0:%f[0-9]+]], [[E1:%f[0-9]+]]}, [retval0+0];
- store <2 x float> %call, <2 x float>* %output, align 8
+ store <2 x float> %call, ptr %output, align 8
; CHECK: st.v2.f32 [{{%rd[0-9]+}}], {[[E0]], [[E1]]}
ret void
}
-define void @test_v3f32(<3 x float> %input, <3 x float>* %output) {
+define void @test_v3f32(<3 x float> %input, ptr %output) {
; CHECK-LABEL: @test_v3f32
;
%call = tail call <3 x float> @barv3(<3 x float> %input)
; CHECK-DAG: ld.param.f32 [[E2:%f[0-9]+]], [retval0+8];
; Make sure we don't load more values than than we need to.
; CHECK-NOT: ld.param.f32 [[E3:%f[0-9]+]], [retval0+12];
- store <3 x float> %call, <3 x float>* %output, align 8
+ store <3 x float> %call, ptr %output, align 8
; CHECK-DAG: st.f32 [{{%rd[0-9]}}+8],
; -- This is suboptimal. We should do st.v2.f32 instead
; of combining 2xf32 info i64.
ret void
}
-define void @test_a2f32([2 x float] %input, [2 x float]* %output) {
+define void @test_a2f32([2 x float] %input, ptr %output) {
; CHECK-LABEL: @test_a2f32
%call = tail call [2 x float] @bara([2 x float] %input)
; CHECK: .param .align 4 .b8 retval0[8];
; CHECK-DAG: ld.param.f32 [[ELEMA1:%f[0-9]+]], [retval0+0];
; CHECK-DAG: ld.param.f32 [[ELEMA2:%f[0-9]+]], [retval0+4];
- store [2 x float] %call, [2 x float]* %output, align 4
+ store [2 x float] %call, ptr %output, align 4
; CHECK: }
; CHECK-DAG: st.f32 [{{%rd[0-9]+}}], [[ELEMA1]]
; CHECK-DAG: st.f32 [{{%rd[0-9]+}}+4], [[ELEMA2]]
; CHECK: ret
}
-define void @test_s2f32({float, float} %input, {float, float}* %output) {
+define void @test_s2f32({float, float} %input, ptr %output) {
; CHECK-LABEL: @test_s2f32
%call = tail call {float, float} @bars({float, float} %input)
; CHECK: .param .align 4 .b8 retval0[8];
; CHECK-DAG: ld.param.f32 [[ELEMS1:%f[0-9]+]], [retval0+0];
; CHECK-DAG: ld.param.f32 [[ELEMS2:%f[0-9]+]], [retval0+4];
- store {float, float} %call, {float, float}* %output, align 4
+ store {float, float} %call, ptr %output, align 4
; CHECK: }
; CHECK-DAG: st.f32 [{{%rd[0-9]+}}], [[ELEMS1]]
; CHECK-DAG: st.f32 [{{%rd[0-9]+}}+4], [[ELEMS2]]
define i32 @a() { ret i32 0 }
; CHECK: ERROR: Module has aliases
-@b = internal alias i32 (), i32 ()* @a
+@b = internal alias i32 (), ptr @a
; CHECK: .global .surfref surface
; CHECK: .entry kernel_func_maxntid
-define void @kernel_func_maxntid(float* %a) {
+define void @kernel_func_maxntid(ptr %a) {
; CHECK: .maxntid 10, 20, 30
; CHECK: ret
ret void
}
; CHECK: .entry kernel_func_reqntid
-define void @kernel_func_reqntid(float* %a) {
+define void @kernel_func_reqntid(ptr %a) {
; CHECK: .reqntid 11, 22, 33
; CHECK: ret
ret void
}
; CHECK: .entry kernel_func_minctasm
-define void @kernel_func_minctasm(float* %a) {
+define void @kernel_func_minctasm(ptr %a) {
; CHECK: .minnctapersm 42
; CHECK: ret
ret void
!nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8, !9, !10}
-!1 = !{void (float*)* @kernel_func_maxntid, !"kernel", i32 1}
-!2 = !{void (float*)* @kernel_func_maxntid, !"maxntidx", i32 10, !"maxntidy", i32 20, !"maxntidz", i32 30}
+!1 = !{ptr @kernel_func_maxntid, !"kernel", i32 1}
+!2 = !{ptr @kernel_func_maxntid, !"maxntidx", i32 10, !"maxntidy", i32 20, !"maxntidz", i32 30}
-!3 = !{void (float*)* @kernel_func_reqntid, !"kernel", i32 1}
-!4 = !{void (float*)* @kernel_func_reqntid, !"reqntidx", i32 11, !"reqntidy", i32 22, !"reqntidz", i32 33}
+!3 = !{ptr @kernel_func_reqntid, !"kernel", i32 1}
+!4 = !{ptr @kernel_func_reqntid, !"reqntidx", i32 11, !"reqntidy", i32 22, !"reqntidz", i32 33}
-!5 = !{void (float*)* @kernel_func_minctasm, !"kernel", i32 1}
-!6 = !{void (float*)* @kernel_func_minctasm, !"minctasm", i32 42}
+!5 = !{ptr @kernel_func_minctasm, !"kernel", i32 1}
+!6 = !{ptr @kernel_func_minctasm, !"minctasm", i32 42}
-!7 = !{void ()* @kernel_func_maxnreg, !"kernel", i32 1}
-!8 = !{void ()* @kernel_func_maxnreg, !"maxnreg", i32 1234}
+!7 = !{ptr @kernel_func_maxnreg, !"kernel", i32 1}
+!8 = !{ptr @kernel_func_maxnreg, !"maxnreg", i32 1234}
-!9 = !{i64 addrspace(1)* @texture, !"texture", i32 1}
-!10 = !{i64 addrspace(1)* @surface, !"surface", i32 1}
+!9 = !{ptr addrspace(1) @texture, !"texture", i32 1}
+!10 = !{ptr addrspace(1) @surface, !"surface", i32 1}
ret void
}
-declare void @llvm.nvvm.cp.async.mbarrier.arrive(i64* %a)
-declare void @llvm.nvvm.cp.async.mbarrier.arrive.shared(i64 addrspace(3)* %a)
-declare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(i64* %a)
-declare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(i64 addrspace(3)* %a)
+declare void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %a)
+declare void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %a)
+declare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %a)
+declare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %a)
; CHECK-LABEL: asyncmbarrier
-define void @asyncmbarrier(i64* %a) {
+define void @asyncmbarrier(ptr %a) {
; CHECK_PTX32: cp.async.mbarrier.arrive.b64 [%r{{[0-9]+}}];
; CHECK_PTX64: cp.async.mbarrier.arrive.b64 [%rd{{[0-9]+}}];
- tail call void @llvm.nvvm.cp.async.mbarrier.arrive(i64* %a)
+ tail call void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %a)
ret void
}
; CHECK-LABEL: asyncmbarriershared
-define void @asyncmbarriershared(i64 addrspace(3)* %a) {
+define void @asyncmbarriershared(ptr addrspace(3) %a) {
; CHECK_PTX32: cp.async.mbarrier.arrive.shared.b64 [%r{{[0-9]+}}];
; CHECK_PTX64: cp.async.mbarrier.arrive.shared.b64 [%rd{{[0-9]+}}];
- tail call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(i64 addrspace(3)* %a)
+ tail call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %a)
ret void
}
; CHECK-LABEL: asyncmbarriernoinc
-define void @asyncmbarriernoinc(i64* %a) {
+define void @asyncmbarriernoinc(ptr %a) {
; CHECK_PTX32: cp.async.mbarrier.arrive.noinc.b64 [%r{{[0-9]+}}];
; CHECK_PTX64: cp.async.mbarrier.arrive.noinc.b64 [%rd{{[0-9]+}}];
- tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(i64* %a)
+ tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %a)
ret void
}
; CHECK-LABEL: asyncmbarriernoincshared
-define void @asyncmbarriernoincshared(i64 addrspace(3)* %a) {
+define void @asyncmbarriernoincshared(ptr addrspace(3) %a) {
; CHECK_PTX32: cp.async.mbarrier.arrive.noinc.shared.b64 [%r{{[0-9]+}}];
; CHECK_PTX64: cp.async.mbarrier.arrive.noinc.shared.b64 [%rd{{[0-9]+}}];
- tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(i64 addrspace(3)* %a)
+ tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %a)
ret void
}
-declare void @llvm.nvvm.cp.async.ca.shared.global.4(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+declare void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %a, ptr addrspace(1) %b)
; CHECK-LABEL: asynccasharedglobal4i8
-define void @asynccasharedglobal4i8(i8 addrspace(3)* %a, i8 addrspace(1)* %b) {
+define void @asynccasharedglobal4i8(ptr addrspace(3) %a, ptr addrspace(1) %b) {
; CHECK_PTX32: cp.async.ca.shared.global [%r{{[0-9]+}}], [%r{{[0-9]+}}], 4;
; CHECK_PTX64: cp.async.ca.shared.global [%rd{{[0-9]+}}], [%rd{{[0-9]+}}], 4;
- tail call void @llvm.nvvm.cp.async.ca.shared.global.4(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+ tail call void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %a, ptr addrspace(1) %b)
ret void
}
-declare void @llvm.nvvm.cp.async.ca.shared.global.8(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+declare void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %a, ptr addrspace(1) %b)
; CHECK-LABEL: asynccasharedglobal8i8
-define void @asynccasharedglobal8i8(i8 addrspace(3)* %a, i8 addrspace(1)* %b) {
+define void @asynccasharedglobal8i8(ptr addrspace(3) %a, ptr addrspace(1) %b) {
; CHECK_PTX32: cp.async.ca.shared.global [%r{{[0-9]+}}], [%r{{[0-9]+}}], 8;
; CHECK_PTX64: cp.async.ca.shared.global [%rd{{[0-9]+}}], [%rd{{[0-9]+}}], 8;
- tail call void @llvm.nvvm.cp.async.ca.shared.global.8(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+ tail call void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %a, ptr addrspace(1) %b)
ret void
}
-declare void @llvm.nvvm.cp.async.ca.shared.global.16(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+declare void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
; CHECK-LABEL: asynccasharedglobal16i8
-define void @asynccasharedglobal16i8(i8 addrspace(3)* %a, i8 addrspace(1)* %b) {
+define void @asynccasharedglobal16i8(ptr addrspace(3) %a, ptr addrspace(1) %b) {
; CHECK_PTX32: cp.async.ca.shared.global [%r{{[0-9]+}}], [%r{{[0-9]+}}], 16;
; CHECK_PTX64: cp.async.ca.shared.global [%rd{{[0-9]+}}], [%rd{{[0-9]+}}], 16;
- tail call void @llvm.nvvm.cp.async.ca.shared.global.16(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+ tail call void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
ret void
}
-declare void @llvm.nvvm.cp.async.cg.shared.global.16(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+declare void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
; CHECK-LABEL: asynccgsharedglobal16i8
-define void @asynccgsharedglobal16i8(i8 addrspace(3)* %a, i8 addrspace(1)* %b) {
+define void @asynccgsharedglobal16i8(ptr addrspace(3) %a, ptr addrspace(1) %b) {
; CHECK_PTX32: cp.async.cg.shared.global [%r{{[0-9]+}}], [%r{{[0-9]+}}], 16;
; CHECK_PTX64: cp.async.cg.shared.global [%rd{{[0-9]+}}], [%rd{{[0-9]+}}], 16;
- tail call void @llvm.nvvm.cp.async.cg.shared.global.16(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+ tail call void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
ret void
}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
target triple = "nvptx64-unknown-unknown"
-define double @kernel(double addrspace(5)* %ptr, double %val) {
- %res = atomicrmw fadd double addrspace(5)* %ptr, double %val monotonic, align 8
+define double @kernel(ptr addrspace(5) %ptr, double %val) {
+ %res = atomicrmw fadd ptr addrspace(5) %ptr, double %val monotonic, align 8
ret double %res
-; CHECK: %1 = load double, double addrspace(5)* %ptr, align 8
+; CHECK: %1 = load double, ptr addrspace(5) %ptr, align 8
; CHECK-NEXT: %new = fadd double %1, %val
-; CHECK-NEXT: store double %new, double addrspace(5)* %ptr, align 8
+; CHECK-NEXT: store double %new, ptr addrspace(5) %ptr, align 8
; CHECK-NEXT: ret double %1
}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
; CHECK-LABEL: .func test(
-define void @test(double* %dp0, double addrspace(1)* %dp1, double addrspace(3)* %dp3, double %d) {
+define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, double %d) {
; CHECK: atom.add.f64
- %r1 = call double @llvm.nvvm.atomic.load.add.f64.p0f64(double* %dp0, double %d)
+ %r1 = call double @llvm.nvvm.atomic.load.add.f64.p0(ptr %dp0, double %d)
; CHECK: atom.global.add.f64
- %r2 = call double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* %dp1, double %d)
+ %r2 = call double @llvm.nvvm.atomic.load.add.f64.p1(ptr addrspace(1) %dp1, double %d)
; CHECK: atom.shared.add.f64
- %ret = call double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* %dp3, double %d)
+ %ret = call double @llvm.nvvm.atomic.load.add.f64.p3(ptr addrspace(3) %dp3, double %d)
ret void
}
; CHECK-LABEL: .func test2(
-define void @test2(double* %dp0, double addrspace(1)* %dp1, double addrspace(3)* %dp3, double %d) {
+define void @test2(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, double %d) {
; CHECK: atom.add.f64
- %r1 = atomicrmw fadd double* %dp0, double %d seq_cst
+ %r1 = atomicrmw fadd ptr %dp0, double %d seq_cst
; CHECK: atom.global.add.f64
- %r2 = atomicrmw fadd double addrspace(1)* %dp1, double %d seq_cst
+ %r2 = atomicrmw fadd ptr addrspace(1) %dp1, double %d seq_cst
; CHECK: atom.shared.add.f64
- %ret = atomicrmw fadd double addrspace(3)* %dp3, double %d seq_cst
+ %ret = atomicrmw fadd ptr addrspace(3) %dp3, double %d seq_cst
ret void
}
-declare double @llvm.nvvm.atomic.load.add.f64.p0f64(double* nocapture, double) #1
-declare double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* nocapture, double) #1
-declare double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* nocapture, double) #1
+declare double @llvm.nvvm.atomic.load.add.f64.p0(ptr nocapture, double) #1
+declare double @llvm.nvvm.atomic.load.add.f64.p1(ptr addrspace(1) nocapture, double) #1
+declare double @llvm.nvvm.atomic.load.add.f64.p3(ptr addrspace(3) nocapture, double) #1
attributes #1 = { argmemonly nounwind }
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
; CHECK-LABEL: .func test_atomics_scope(
-define void @test_atomics_scope(float* %fp, float %f,
- double* %dfp, double %df,
- i32* %ip, i32 %i,
- i32* %uip, i32 %ui,
- i64* %llp, i64 %ll) #0 {
+define void @test_atomics_scope(ptr %fp, float %f,
+ ptr %dfp, double %df,
+ ptr %ip, i32 %i,
+ ptr %uip, i32 %ui,
+ ptr %llp, i64 %ll) #0 {
entry:
; CHECK: atom.cta.add.s32
- %tmp36 = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+ %tmp36 = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.cta.add.u64
- %tmp38 = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp38 = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.sys.add.s32
- %tmp39 = tail call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+ %tmp39 = tail call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.sys.add.u64
- %tmp41 = tail call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp41 = tail call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.cta.add.f32
- %tmp42 = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* %fp, float %f)
+ %tmp42 = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr %fp, float %f)
; CHECK: atom.cta.add.f64
- %tmp43 = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* %dfp, double %df)
+ %tmp43 = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr %dfp, double %df)
; CHECK: atom.sys.add.f32
- %tmp44 = tail call float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0f32(float* %fp, float %f)
+ %tmp44 = tail call float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0(ptr %fp, float %f)
; CHECK: atom.sys.add.f64
- %tmp45 = tail call double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0f64(double* %dfp, double %df)
+ %tmp45 = tail call double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0(ptr %dfp, double %df)
; CHECK: atom.cta.exch.b32
- %tmp46 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+ %tmp46 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.cta.exch.b64
- %tmp48 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp48 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.sys.exch.b32
- %tmp49 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+ %tmp49 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.sys.exch.b64
- %tmp51 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp51 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.cta.max.s32
- %tmp52 = tail call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+ %tmp52 = tail call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.cta.max.s64
- %tmp56 = tail call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp56 = tail call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.sys.max.s32
- %tmp58 = tail call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+ %tmp58 = tail call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.sys.max.s64
- %tmp62 = tail call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp62 = tail call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.cta.min.s32
- %tmp64 = tail call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+ %tmp64 = tail call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.cta.min.s64
- %tmp68 = tail call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp68 = tail call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.sys.min.s32
- %tmp70 = tail call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+ %tmp70 = tail call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.sys.min.s64
- %tmp74 = tail call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp74 = tail call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.cta.inc.u32
- %tmp76 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+ %tmp76 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.sys.inc.u32
- %tmp77 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+ %tmp77 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.cta.dec.u32
- %tmp78 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+ %tmp78 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.sys.dec.u32
- %tmp79 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+ %tmp79 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.cta.and.b32
- %tmp80 = tail call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+ %tmp80 = tail call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.cta.and.b64
- %tmp82 = tail call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp82 = tail call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.sys.and.b32
- %tmp83 = tail call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+ %tmp83 = tail call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.sys.and.b64
- %tmp85 = tail call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp85 = tail call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.cta.or.b32
- %tmp86 = tail call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+ %tmp86 = tail call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.cta.or.b64
- %tmp88 = tail call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp88 = tail call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.sys.or.b32
- %tmp89 = tail call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+ %tmp89 = tail call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.sys.or.b64
- %tmp91 = tail call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp91 = tail call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.cta.xor.b32
- %tmp92 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+ %tmp92 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.cta.xor.b64
- %tmp94 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp94 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.sys.xor.b32
- %tmp95 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+ %tmp95 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.sys.xor.b64
- %tmp97 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp97 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.cta.cas.b32
- %tmp98 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 %i, i32 %i)
+ %tmp98 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 %i, i32 %i)
; CHECK: atom.cta.cas.b64
- %tmp100 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll, i64 %ll)
+ %tmp100 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0(ptr %llp, i64 %ll, i64 %ll)
; CHECK: atom.sys.cas.b32
- %tmp101 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32(i32* %ip, i32 %i, i32 %i)
+ %tmp101 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0(ptr %ip, i32 %i, i32 %i)
; CHECK: atom.sys.cas.b64
- %tmp103 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll, i64 %ll)
+ %tmp103 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0(ptr %llp, i64 %ll, i64 %ll)
; CHECK: ret
ret void
; Make sure we use constants as operands to our scoped atomic calls, where appropriate.
; CHECK-LABEL: .func test_atomics_scope_imm(
-define void @test_atomics_scope_imm(float* %fp, float %f,
- double* %dfp, double %df,
- i32* %ip, i32 %i,
- i32* %uip, i32 %ui,
- i64* %llp, i64 %ll) #0 {
+define void @test_atomics_scope_imm(ptr %fp, float %f,
+ ptr %dfp, double %df,
+ ptr %ip, i32 %i,
+ ptr %uip, i32 %ui,
+ ptr %llp, i64 %ll) #0 {
; CHECK: atom.cta.add.s32{{.*}} %r{{[0-9]+}};
- %tmp1r = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+ %tmp1r = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr %ip, i32 %i)
; CHECK: atom.cta.add.s32{{.*}}, 1;
- %tmp1i = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* %ip, i32 1)
+ %tmp1i = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr %ip, i32 1)
; CHECK: atom.cta.add.u64{{.*}}, %rd{{[0-9]+}};
- %tmp2r = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+ %tmp2r = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
; CHECK: atom.cta.add.u64{{.*}}, 2;
- %tmp2i = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* %llp, i64 2)
+ %tmp2i = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr %llp, i64 2)
; CHECK: atom.cta.add.f32{{.*}}, %f{{[0-9]+}};
- %tmp3r = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* %fp, float %f)
+ %tmp3r = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr %fp, float %f)
; CHECK: atom.cta.add.f32{{.*}}, 0f40400000;
- %tmp3i = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* %fp, float 3.0)
+ %tmp3i = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr %fp, float 3.0)
; CHECK: atom.cta.add.f64{{.*}}, %fd{{[0-9]+}};
- %tmp4r = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* %dfp, double %df)
+ %tmp4r = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr %dfp, double %df)
; CHECK: atom.cta.add.f64{{.*}}, 0d4010000000000000;
- %tmp4i = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* %dfp, double 4.0)
+ %tmp4i = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr %dfp, double 4.0)
; CAS is implemented separately and has more arguments
; CHECK: atom.cta.cas.b32{{.*}}], %r{{[0-9+]}}, %r{{[0-9+]}};
- %tmp5rr = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 %i, i32 %i)
+ %tmp5rr = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 %i, i32 %i)
; For some reason in 64-bit mode we end up passing 51 via a register.
; CHECK32: atom.cta.cas.b32{{.*}}], %r{{[0-9+]}}, 51;
- %tmp5ri = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 %i, i32 51)
+ %tmp5ri = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 %i, i32 51)
; CHECK: atom.cta.cas.b32{{.*}}], 52, %r{{[0-9+]}};
- %tmp5ir = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 52, i32 %i)
+ %tmp5ir = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 52, i32 %i)
; CHECK: atom.cta.cas.b32{{.*}}], 53, 54;
- %tmp5ii = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 53, i32 54)
+ %tmp5ii = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 53, i32 54)
; CHECK: ret
ret void
}
-declare i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* nocapture, float) #1
-declare double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* nocapture, double) #1
-declare float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0f32(float* nocapture, float) #1
-declare double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0f64(double* nocapture, double) #1
-declare i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* nocapture, i32, i32) #1
-declare i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64(i64* nocapture, i64, i64) #1
-declare i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32(i32* nocapture, i32, i32) #1
-declare i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64(i64* nocapture, i64, i64) #1
+declare i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr nocapture, float) #1
+declare double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr nocapture, double) #1
+declare float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0(ptr nocapture, float) #1
+declare double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0(ptr nocapture, double) #1
+declare i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr nocapture, i32, i32) #1
+declare i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0(ptr nocapture, i64, i64) #1
+declare i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0(ptr nocapture, i32, i32) #1
+declare i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0(ptr nocapture, i64, i64) #1
attributes #1 = { argmemonly nounwind }
; CHECK-LABEL: atom0
-define i32 @atom0(i32* %addr, i32 %val) {
+define i32 @atom0(ptr %addr, i32 %val) {
; CHECK: atom.add.u32
- %ret = atomicrmw add i32* %addr, i32 %val seq_cst
+ %ret = atomicrmw add ptr %addr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom1
-define i64 @atom1(i64* %addr, i64 %val) {
+define i64 @atom1(ptr %addr, i64 %val) {
; CHECK: atom.add.u64
- %ret = atomicrmw add i64* %addr, i64 %val seq_cst
+ %ret = atomicrmw add ptr %addr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom2
-define i32 @atom2(i32* %subr, i32 %val) {
+define i32 @atom2(ptr %subr, i32 %val) {
; CHECK: neg.s32
; CHECK: atom.add.u32
- %ret = atomicrmw sub i32* %subr, i32 %val seq_cst
+ %ret = atomicrmw sub ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom3
-define i64 @atom3(i64* %subr, i64 %val) {
+define i64 @atom3(ptr %subr, i64 %val) {
; CHECK: neg.s64
; CHECK: atom.add.u64
- %ret = atomicrmw sub i64* %subr, i64 %val seq_cst
+ %ret = atomicrmw sub ptr %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom4
-define i32 @atom4(i32* %subr, i32 %val) {
+define i32 @atom4(ptr %subr, i32 %val) {
; CHECK: atom.and.b32
- %ret = atomicrmw and i32* %subr, i32 %val seq_cst
+ %ret = atomicrmw and ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom5
-define i64 @atom5(i64* %subr, i64 %val) {
+define i64 @atom5(ptr %subr, i64 %val) {
; CHECK: atom.and.b64
- %ret = atomicrmw and i64* %subr, i64 %val seq_cst
+ %ret = atomicrmw and ptr %subr, i64 %val seq_cst
ret i64 %ret
}
;; NAND not yet supported
-;define i32 @atom6(i32* %subr, i32 %val) {
-; %ret = atomicrmw nand i32* %subr, i32 %val seq_cst
+;define i32 @atom6(ptr %subr, i32 %val) {
+; %ret = atomicrmw nand ptr %subr, i32 %val seq_cst
; ret i32 %ret
;}
-;define i64 @atom7(i64* %subr, i64 %val) {
-; %ret = atomicrmw nand i64* %subr, i64 %val seq_cst
+;define i64 @atom7(ptr %subr, i64 %val) {
+; %ret = atomicrmw nand ptr %subr, i64 %val seq_cst
; ret i64 %ret
;}
; CHECK-LABEL: atom8
-define i32 @atom8(i32* %subr, i32 %val) {
+define i32 @atom8(ptr %subr, i32 %val) {
; CHECK: atom.or.b32
- %ret = atomicrmw or i32* %subr, i32 %val seq_cst
+ %ret = atomicrmw or ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom9
-define i64 @atom9(i64* %subr, i64 %val) {
+define i64 @atom9(ptr %subr, i64 %val) {
; CHECK: atom.or.b64
- %ret = atomicrmw or i64* %subr, i64 %val seq_cst
+ %ret = atomicrmw or ptr %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom10
-define i32 @atom10(i32* %subr, i32 %val) {
+define i32 @atom10(ptr %subr, i32 %val) {
; CHECK: atom.xor.b32
- %ret = atomicrmw xor i32* %subr, i32 %val seq_cst
+ %ret = atomicrmw xor ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom11
-define i64 @atom11(i64* %subr, i64 %val) {
+define i64 @atom11(ptr %subr, i64 %val) {
; CHECK: atom.xor.b64
- %ret = atomicrmw xor i64* %subr, i64 %val seq_cst
+ %ret = atomicrmw xor ptr %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom12
-define i32 @atom12(i32* %subr, i32 %val) {
+define i32 @atom12(ptr %subr, i32 %val) {
; CHECK: atom.max.s32
- %ret = atomicrmw max i32* %subr, i32 %val seq_cst
+ %ret = atomicrmw max ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom13
-define i64 @atom13(i64* %subr, i64 %val) {
+define i64 @atom13(ptr %subr, i64 %val) {
; CHECK: atom.max.s64
- %ret = atomicrmw max i64* %subr, i64 %val seq_cst
+ %ret = atomicrmw max ptr %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom14
-define i32 @atom14(i32* %subr, i32 %val) {
+define i32 @atom14(ptr %subr, i32 %val) {
; CHECK: atom.min.s32
- %ret = atomicrmw min i32* %subr, i32 %val seq_cst
+ %ret = atomicrmw min ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom15
-define i64 @atom15(i64* %subr, i64 %val) {
+define i64 @atom15(ptr %subr, i64 %val) {
; CHECK: atom.min.s64
- %ret = atomicrmw min i64* %subr, i64 %val seq_cst
+ %ret = atomicrmw min ptr %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom16
-define i32 @atom16(i32* %subr, i32 %val) {
+define i32 @atom16(ptr %subr, i32 %val) {
; CHECK: atom.max.u32
- %ret = atomicrmw umax i32* %subr, i32 %val seq_cst
+ %ret = atomicrmw umax ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom17
-define i64 @atom17(i64* %subr, i64 %val) {
+define i64 @atom17(ptr %subr, i64 %val) {
; CHECK: atom.max.u64
- %ret = atomicrmw umax i64* %subr, i64 %val seq_cst
+ %ret = atomicrmw umax ptr %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK-LABEL: atom18
-define i32 @atom18(i32* %subr, i32 %val) {
+define i32 @atom18(ptr %subr, i32 %val) {
; CHECK: atom.min.u32
- %ret = atomicrmw umin i32* %subr, i32 %val seq_cst
+ %ret = atomicrmw umin ptr %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK-LABEL: atom19
-define i64 @atom19(i64* %subr, i64 %val) {
+define i64 @atom19(ptr %subr, i64 %val) {
; CHECK: atom.min.u64
- %ret = atomicrmw umin i64* %subr, i64 %val seq_cst
+ %ret = atomicrmw umin ptr %subr, i64 %val seq_cst
ret i64 %ret
}
-declare float @llvm.nvvm.atomic.load.add.f32.p0f32(float* %addr, float %val)
+declare float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val)
; CHECK-LABEL: atomic_add_f32_generic
-define float @atomic_add_f32_generic(float* %addr, float %val) {
+define float @atomic_add_f32_generic(ptr %addr, float %val) {
; CHECK: atom.add.f32
- %ret = call float @llvm.nvvm.atomic.load.add.f32.p0f32(float* %addr, float %val)
+ %ret = call float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val)
ret float %ret
}
-declare float @llvm.nvvm.atomic.load.add.f32.p1f32(float addrspace(1)* %addr, float %val)
+declare float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val)
; CHECK-LABEL: atomic_add_f32_addrspace1
-define float @atomic_add_f32_addrspace1(float addrspace(1)* %addr, float %val) {
+define float @atomic_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
; CHECK: atom.global.add.f32
- %ret = call float @llvm.nvvm.atomic.load.add.f32.p1f32(float addrspace(1)* %addr, float %val)
+ %ret = call float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val)
ret float %ret
}
-declare float @llvm.nvvm.atomic.load.add.f32.p3f32(float addrspace(3)* %addr, float %val)
+declare float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val)
; CHECK-LABEL: atomic_add_f32_addrspace3
-define float @atomic_add_f32_addrspace3(float addrspace(3)* %addr, float %val) {
+define float @atomic_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
; CHECK: atom.shared.add.f32
- %ret = call float @llvm.nvvm.atomic.load.add.f32.p3f32(float addrspace(3)* %addr, float %val)
+ %ret = call float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val)
ret float %ret
}
; CHECK-LABEL: atomicrmw_add_f32_generic
-define float @atomicrmw_add_f32_generic(float* %addr, float %val) {
+define float @atomicrmw_add_f32_generic(ptr %addr, float %val) {
; CHECK: atom.add.f32
- %ret = atomicrmw fadd float* %addr, float %val seq_cst
+ %ret = atomicrmw fadd ptr %addr, float %val seq_cst
ret float %ret
}
; CHECK-LABEL: atomicrmw_add_f32_addrspace1
-define float @atomicrmw_add_f32_addrspace1(float addrspace(1)* %addr, float %val) {
+define float @atomicrmw_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
; CHECK: atom.global.add.f32
- %ret = atomicrmw fadd float addrspace(1)* %addr, float %val seq_cst
+ %ret = atomicrmw fadd ptr addrspace(1) %addr, float %val seq_cst
ret float %ret
}
; CHECK-LABEL: atomicrmw_add_f32_addrspace3
-define float @atomicrmw_add_f32_addrspace3(float addrspace(3)* %addr, float %val) {
+define float @atomicrmw_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
; CHECK: atom.shared.add.f32
- %ret = atomicrmw fadd float addrspace(3)* %addr, float %val seq_cst
+ %ret = atomicrmw fadd ptr addrspace(3) %addr, float %val seq_cst
ret float %ret
}
; CHECK-LABEL: atomic_cmpxchg_i32
-define i32 @atomic_cmpxchg_i32(i32* %addr, i32 %cmp, i32 %new) {
+define i32 @atomic_cmpxchg_i32(ptr %addr, i32 %cmp, i32 %new) {
; CHECK: atom.cas.b32
- %pairold = cmpxchg i32* %addr, i32 %cmp, i32 %new seq_cst seq_cst
+ %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new seq_cst seq_cst
ret i32 %new
}
; CHECK-LABEL: atomic_cmpxchg_i64
-define i64 @atomic_cmpxchg_i64(i64* %addr, i64 %cmp, i64 %new) {
+define i64 @atomic_cmpxchg_i64(ptr %addr, i64 %cmp, i64 %new) {
; CHECK: atom.cas.b64
- %pairold = cmpxchg i64* %addr, i64 %cmp, i64 %new seq_cst seq_cst
+ %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new seq_cst seq_cst
ret i64 %new
}
%float4 = type { float, float, float, float }
%float3 = type { float, float, float }
%int3 = type { i32, i32, i32 }
-%struct.spam.2 = type { %struct.foo.3, i16*, float, float, i32, float }
-%struct.foo.3 = type <{ %float4*, %float4*, %float4*, i32*, i32*, i32, i32, float }>
+%struct.spam.2 = type { %struct.foo.3, ptr, float, float, i32, float }
+%struct.foo.3 = type <{ ptr, ptr, ptr, ptr, ptr, i32, i32, float }>
%struct.zot = type { %struct.bar, [8 x i8], %struct.foo, [12 x i8] }
-%struct.bar = type { i32 (...)** }
-%struct.foo = type <{ i16*, %float4, %int3, i32, %float3, [4 x i8], i64, i32, i8, [3 x i8], i32 }>
+%struct.bar = type { ptr }
+%struct.foo = type <{ ptr, %float4, %int3, i32, %float3, [4 x i8], i64, i32, i8, [3 x i8], i32 }>
@global = external local_unnamed_addr addrspace(4) externally_initialized global [27 x %char3], align 1
-@global_1 = linkonce_odr unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* inttoptr (i64 16 to i8*), i8* null, i8* null] }, align 8
+@global_1 = linkonce_odr unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr inttoptr (i64 16 to ptr), ptr null, ptr null] }, align 8
; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
declare %float4 @snork(float) local_unnamed_addr
declare i64 @foo() local_unnamed_addr
-define void @barney(%struct.spam.2* nocapture readonly %arg) local_unnamed_addr {
+define void @barney(ptr nocapture readonly %arg) local_unnamed_addr {
bb:
tail call void asm sideeffect "// KEEP", ""() #1
%tmp = alloca %struct.zot, align 16
- %tmp4 = getelementptr inbounds %struct.spam.2, %struct.spam.2* %arg, i64 0, i32 1
- %tmp5 = load i16*, i16** %tmp4, align 8
- %tmp6 = bitcast %struct.zot* %tmp to i8*
- %tmp9 = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 1
- %0 = bitcast %float4* %tmp9 to i16**
- store i16* %tmp5, i16** %0, align 8
- %tmp10 = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 0, i32 0
- store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @global_1, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)*** %tmp10, align 16
- %tmp34 = getelementptr %struct.spam.2, %struct.spam.2* %arg, i64 0, i32 0, i32 0
+ %tmp4 = getelementptr inbounds %struct.spam.2, ptr %arg, i64 0, i32 1
+ %tmp5 = load ptr, ptr %tmp4, align 8
+ %tmp9 = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 1
+ store ptr %tmp5, ptr %tmp9, align 8
+ store ptr getelementptr inbounds ({ [3 x ptr] }, ptr @global_1, i64 0, inrange i32 0, i64 3), ptr %tmp, align 16
%tmp.i1 = tail call i64 @foo()
- %tmp44.i16 = getelementptr inbounds i16, i16* %tmp5, i64 undef
- %tmp45.i17 = load i16, i16* %tmp44.i16, align 2
+ %tmp44.i16 = getelementptr inbounds i16, ptr %tmp5, i64 undef
+ %tmp45.i17 = load i16, ptr %tmp44.i16, align 2
%tmp47.i18 = icmp eq i16 %tmp45.i17, -1
br i1 %tmp47.i18, label %bb14, label %bb49.i.lr.ph
bb49.i.lr.ph: ; preds = %bb
- %tmp16 = bitcast %struct.zot* %tmp to i8**
- %tmp7.i6 = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2
+ %tmp7.i6 = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2
%extract = lshr i16 %tmp45.i17, 11
%extract.t = trunc i16 %extract to i8
- %1 = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2
- %tmp58.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 1, i32 2
- %tmp59.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 4, i32 2
- %tmp62.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 2, i32 2
- %2 = getelementptr inbounds %struct.foo, %struct.foo* %1, i64 1
- %3 = bitcast %struct.foo* %2 to i8*
- %tmp64.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 10
- %tmp19.i.i = load float, float* %tmp58.i, align 16
- %tmp23.i.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 2
- %4 = bitcast %int3* %tmp23.i.i to float*
- %tmp24.i.i = load float, float* %4, align 8
- %5 = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 5, i64 0
- %6 = bitcast i8* %5 to float*
- %.repack3.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 6
- %7 = bitcast i64* %.repack3.i to float*
- %tmp41.i.i = load i32, i32* %tmp62.i, align 16
- %tmp48.i.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 3
- %tmp49.i.i = load i32, i32* %tmp48.i.i, align 4
- %tmp54.i.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 4
- %8 = bitcast %float3* %tmp54.i.i to i32*
- %tmp55.i.i = load i32, i32* %8, align 8
- %tmp9.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 7
- %9 = bitcast i32* %tmp9.i to i64*
- %tmp40.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 4, i32 1
- %10 = bitcast float* %tmp40.i to i32*
- %tmp41.i = load i32, i32* %10, align 4
+ %0 = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2
+ %tmp58.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 1, i32 2
+ %tmp59.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 4, i32 2
+ %tmp62.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 2, i32 2
+ %1 = getelementptr inbounds %struct.foo, ptr %0, i64 1
+ %tmp64.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 10
+ %tmp19.i.i = load float, ptr %tmp58.i, align 16
+ %tmp23.i.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 2
+ %tmp24.i.i = load float, ptr %tmp23.i.i, align 8
+ %2 = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 5, i64 0
+ %.repack3.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 6
+ %tmp41.i.i = load i32, ptr %tmp62.i, align 16
+ %tmp48.i.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 3
+ %tmp49.i.i = load i32, ptr %tmp48.i.i, align 4
+ %tmp54.i.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 4
+ %tmp55.i.i = load i32, ptr %tmp54.i.i, align 8
+ %tmp9.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 7
+ %tmp40.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 4, i32 1
+ %tmp41.i = load i32, ptr %tmp40.i, align 4
%tmp42.i = zext i32 %tmp41.i to i64
- %tmp7.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2
- %tmp17.pre = load i8*, i8** %tmp16, align 16
- %tmp60.i.peel = bitcast %struct.foo* %tmp7.i6 to i32**
- %tmp61.i.peel = load i32*, i32** %tmp60.i.peel, align 16
+ %tmp7.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2
+ %tmp17.pre = load ptr, ptr %tmp, align 16
+ %tmp61.i.peel = load ptr, ptr %tmp7.i6, align 16
%tmp10.i.i.peel = add nsw i8 %extract.t, -1
- store i8 %tmp10.i.i.peel, i8* %3, align 4
+ store i8 %tmp10.i.i.peel, ptr %1, align 4
%tmp13.i.i.peel = tail call %float3 @zot() #1
%tmp15.i.i.peel = extractvalue %float3 %tmp13.i.i.peel, 0
%tmp22.i.i.peel = fsub contract float %tmp19.i.i, %tmp15.i.i.peel
%tmp27.i.i.peel = fsub contract float %tmp24.i.i, %tmp17.i.i.peel
%tmp28.i.i.peel = tail call %float3 @bar_2(float %tmp22.i.i.peel, float %tmp27.i.i.peel) #1
%tmp28.i.elt.i.peel = extractvalue %float3 %tmp28.i.i.peel, 0
- store float %tmp28.i.elt.i.peel, float* %tmp59.i, align 16
+ store float %tmp28.i.elt.i.peel, ptr %tmp59.i, align 16
%tmp28.i.elt2.i.peel = extractvalue %float3 %tmp28.i.i.peel, 1
- store float %tmp28.i.elt2.i.peel, float* %6, align 4
+ store float %tmp28.i.elt2.i.peel, ptr %2, align 4
%tmp28.i.elt4.i.peel = extractvalue %float3 %tmp28.i.i.peel, 2
- store float %tmp28.i.elt4.i.peel, float* %7, align 8
+ store float %tmp28.i.elt4.i.peel, ptr %.repack3.i, align 8
%tmp38.i.i.peel = zext i8 %tmp10.i.i.peel to i64
- %tmp39.i5.i.peel = getelementptr inbounds [27 x %char3], [27 x %char3] addrspace(4)* @global, i64 0, i64 %tmp38.i.i.peel
- %tmp39.i.i.peel = addrspacecast %char3 addrspace(4)* %tmp39.i5.i.peel to %char3*
- %tmp42.i.i.peel = getelementptr inbounds %char3, %char3* %tmp39.i.i.peel, i64 0, i32 0
- %tmp43.i.i.peel = load i8, i8* %tmp42.i.i.peel, align 1
+ %tmp39.i5.i.peel = getelementptr inbounds [27 x %char3], ptr addrspace(4) @global, i64 0, i64 %tmp38.i.i.peel
+ %tmp39.i.i.peel = addrspacecast ptr addrspace(4) %tmp39.i5.i.peel to ptr
+ %tmp43.i.i.peel = load i8, ptr %tmp39.i.i.peel, align 1
%tmp44.i.i.peel = sext i8 %tmp43.i.i.peel to i32
%tmp45.i.i.peel = add nsw i32 %tmp41.i.i, %tmp44.i.i.peel
- %tmp50.i.i.peel = getelementptr inbounds %char3, %char3* %tmp39.i.i.peel, i64 0, i32 1
- %tmp51.i.i.peel = load i8, i8* %tmp50.i.i.peel, align 1
+ %tmp50.i.i.peel = getelementptr inbounds %char3, ptr %tmp39.i.i.peel, i64 0, i32 1
+ %tmp51.i.i.peel = load i8, ptr %tmp50.i.i.peel, align 1
%tmp52.i.i.peel = sext i8 %tmp51.i.i.peel to i32
%tmp53.i.i.peel = add nsw i32 %tmp49.i.i, %tmp52.i.i.peel
- %tmp56.i.i.peel = getelementptr inbounds %char3, %char3* %tmp39.i.i.peel, i64 0, i32 2
- %tmp57.i.i.peel = load i8, i8* %tmp56.i.i.peel, align 1
+ %tmp56.i.i.peel = getelementptr inbounds %char3, ptr %tmp39.i.i.peel, i64 0, i32 2
+ %tmp57.i.i.peel = load i8, ptr %tmp56.i.i.peel, align 1
%tmp58.i.i.peel = sext i8 %tmp57.i.i.peel to i32
%tmp59.i.i.peel = add nsw i32 %tmp55.i.i, %tmp58.i.i.peel
%tmp60.i.i.peel = tail call %int3 @hoge(i32 %tmp45.i.i.peel, i32 %tmp53.i.i.peel, i32 %tmp59.i.i.peel) #1
- %tmp61.i.i.peel = getelementptr inbounds i32, i32* %tmp61.i.peel, i64 undef
- %tmp62.i.i.peel = load i32, i32* %tmp61.i.i.peel, align 4
- store i32 %tmp62.i.i.peel, i32* %tmp64.i, align 8
- %tmp22.peel = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2
- %11 = bitcast %struct.foo* %tmp22.peel to i8*
- %tmp24.peel = getelementptr inbounds i8, i8* %11, i64 80
- %12 = bitcast i8* %tmp24.peel to i32*
- %tmp25.peel = load i32, i32* %12, align 16
- %tmp36.peel = load %float4*, %float4** %tmp34, align 8
+ %tmp61.i.i.peel = getelementptr inbounds i32, ptr %tmp61.i.peel, i64 undef
+ %tmp62.i.i.peel = load i32, ptr %tmp61.i.i.peel, align 4
+ store i32 %tmp62.i.i.peel, ptr %tmp64.i, align 8
+ %tmp22.peel = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2
+ %tmp24.peel = getelementptr inbounds i8, ptr %tmp22.peel, i64 80
+ %tmp25.peel = load i32, ptr %tmp24.peel, align 16
+ %tmp36.peel = load ptr, ptr %arg, align 8
%tmp37.peel = zext i32 %tmp25.peel to i64
- %tmp38.peel = getelementptr inbounds %float4, %float4* %tmp36.peel, i64 %tmp37.peel
- %tmp39.peel = bitcast %float4* %tmp38.peel to i8*
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1 %tmp39.peel, i64 undef, i1 false)
- %tmp40.peel = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 4, i32 2
- %tmp41.peel25 = getelementptr inbounds float, float* %tmp40.peel, i64 2
- %tmp42.peel = load float, float* %tmp41.peel25, align 8
- %tmp44.peel = load float, float* inttoptr (i64 8 to float*), align 8
+ %tmp38.peel = getelementptr inbounds %float4, ptr %tmp36.peel, i64 %tmp37.peel
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 undef, ptr align 1 %tmp38.peel, i64 undef, i1 false)
+ %tmp40.peel = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 4, i32 2
+ %tmp41.peel25 = getelementptr inbounds float, ptr %tmp40.peel, i64 2
+ %tmp42.peel = load float, ptr %tmp41.peel25, align 8
+ %tmp44.peel = load float, ptr inttoptr (i64 8 to ptr), align 8
%tmp45.peel = fsub contract float %tmp42.peel, %tmp44.peel
%tmp46.peel = tail call %float4 @snork(float %tmp45.peel)
%tmp.i.peel = tail call i64 @foo()
- %tmp10.i.peel = load i64, i64* %9, align 16
+ %tmp10.i.peel = load i64, ptr %tmp9.i, align 16
%tmp11.i.peel = add i64 %tmp10.i.peel, %tmp.i.peel
- store i64 %tmp11.i.peel, i64* %9, align 16, !tbaa !1
+ store i64 %tmp11.i.peel, ptr %tmp9.i, align 16, !tbaa !1
%tmp43.i.peel = add i64 %tmp11.i.peel, %tmp42.i
- %tmp44.i.peel = getelementptr inbounds i16, i16* %tmp5, i64 %tmp43.i.peel
- %tmp45.i.peel = load i16, i16* %tmp44.i.peel, align 2
+ %tmp44.i.peel = getelementptr inbounds i16, ptr %tmp5, i64 %tmp43.i.peel
+ %tmp45.i.peel = load i16, ptr %tmp44.i.peel, align 2
%tmp47.i.peel = icmp eq i16 %tmp45.i.peel, -1
%extract21.peel = lshr i16 %tmp45.i.peel, 11
%extract.t22.peel = trunc i16 %extract21.peel to i8
br i1 %tmp47.i.peel, label %bb14, label %bb49.i.lr.ph.peel.newph
bb49.i.lr.ph.peel.newph: ; preds = %bb49.i.lr.ph
- %tmp60.i = bitcast %struct.foo* %tmp7.i to i32**
- %tmp61.i = load i32*, i32** %tmp60.i, align 16
- %tmp61.i.i = getelementptr inbounds i32, i32* %tmp61.i, i64 undef
- %tmp18 = getelementptr i8, i8* %tmp17.pre, i64 -24
- %tmp19 = bitcast i8* %tmp18 to i64*
+ %tmp61.i = load ptr, ptr %tmp7.i, align 16
+ %tmp61.i.i = getelementptr inbounds i32, ptr %tmp61.i, i64 undef
+ %tmp18 = getelementptr i8, ptr %tmp17.pre, i64 -24
br label %bb49.i
bb49.i: ; preds = %bb49.i, %bb49.i.lr.ph.peel.newph
%tmp45.i20.off11 = phi i8 [ %extract.t22.peel, %bb49.i.lr.ph.peel.newph ], [ %extract.t22, %bb49.i ]
%tmp10.i.i = add nsw i8 %tmp45.i20.off11, -1
- store i8 %tmp10.i.i, i8* %3, align 4
+ store i8 %tmp10.i.i, ptr %1, align 4
%tmp13.i.i = tail call %float3 @zot() #1
%tmp15.i.i = extractvalue %float3 %tmp13.i.i, 0
%tmp22.i.i = fsub contract float %tmp19.i.i, %tmp15.i.i
%tmp27.i.i = fsub contract float %tmp24.i.i, %tmp17.i.i
%tmp28.i.i = tail call %float3 @bar_2(float %tmp22.i.i, float %tmp27.i.i) #1
%tmp28.i.elt.i = extractvalue %float3 %tmp28.i.i, 0
- store float %tmp28.i.elt.i, float* %tmp59.i, align 16
+ store float %tmp28.i.elt.i, ptr %tmp59.i, align 16
%tmp28.i.elt2.i = extractvalue %float3 %tmp28.i.i, 1
- store float %tmp28.i.elt2.i, float* %6, align 4
+ store float %tmp28.i.elt2.i, ptr %2, align 4
%tmp28.i.elt4.i = extractvalue %float3 %tmp28.i.i, 2
- store float %tmp28.i.elt4.i, float* %7, align 8
+ store float %tmp28.i.elt4.i, ptr %.repack3.i, align 8
%tmp38.i.i = zext i8 %tmp10.i.i to i64
- %tmp39.i5.i = getelementptr inbounds [27 x %char3], [27 x %char3] addrspace(4)* @global, i64 0, i64 %tmp38.i.i
- %tmp39.i.i = addrspacecast %char3 addrspace(4)* %tmp39.i5.i to %char3*
- %tmp42.i.i = getelementptr inbounds %char3, %char3* %tmp39.i.i, i64 0, i32 0
- %tmp43.i.i = load i8, i8* %tmp42.i.i, align 1
+ %tmp39.i5.i = getelementptr inbounds [27 x %char3], ptr addrspace(4) @global, i64 0, i64 %tmp38.i.i
+ %tmp39.i.i = addrspacecast ptr addrspace(4) %tmp39.i5.i to ptr
+ %tmp43.i.i = load i8, ptr %tmp39.i.i, align 1
%tmp44.i.i = sext i8 %tmp43.i.i to i32
%tmp45.i.i = add nsw i32 %tmp41.i.i, %tmp44.i.i
- %tmp50.i.i = getelementptr inbounds %char3, %char3* %tmp39.i.i, i64 0, i32 1
- %tmp51.i.i = load i8, i8* %tmp50.i.i, align 1
+ %tmp50.i.i = getelementptr inbounds %char3, ptr %tmp39.i.i, i64 0, i32 1
+ %tmp51.i.i = load i8, ptr %tmp50.i.i, align 1
%tmp52.i.i = sext i8 %tmp51.i.i to i32
%tmp53.i.i = add nsw i32 %tmp49.i.i, %tmp52.i.i
- %tmp56.i.i = getelementptr inbounds %char3, %char3* %tmp39.i.i, i64 0, i32 2
- %tmp57.i.i = load i8, i8* %tmp56.i.i, align 1
+ %tmp56.i.i = getelementptr inbounds %char3, ptr %tmp39.i.i, i64 0, i32 2
+ %tmp57.i.i = load i8, ptr %tmp56.i.i, align 1
%tmp58.i.i = sext i8 %tmp57.i.i to i32
%tmp59.i.i = add nsw i32 %tmp55.i.i, %tmp58.i.i
%tmp60.i.i = tail call %int3 @hoge(i32 %tmp45.i.i, i32 %tmp53.i.i, i32 %tmp59.i.i) #1
- %tmp62.i.i = load i32, i32* %tmp61.i.i, align 4
- store i32 %tmp62.i.i, i32* %tmp64.i, align 8
- %tmp20 = load i64, i64* %tmp19, align 8
- %tmp22 = getelementptr inbounds i8, i8* %tmp6, i64 %tmp20
- %tmp24 = getelementptr inbounds i8, i8* %tmp22, i64 80
- %13 = bitcast i8* %tmp24 to i32*
- %tmp25 = load i32, i32* %13, align 4
- %tmp36 = load %float4*, %float4** %tmp34, align 8
+ %tmp62.i.i = load i32, ptr %tmp61.i.i, align 4
+ store i32 %tmp62.i.i, ptr %tmp64.i, align 8
+ %tmp20 = load i64, ptr %tmp18, align 8
+ %tmp22 = getelementptr inbounds i8, ptr %tmp, i64 %tmp20
+ %tmp24 = getelementptr inbounds i8, ptr %tmp22, i64 80
+ %tmp25 = load i32, ptr %tmp24, align 4
+ %tmp36 = load ptr, ptr %arg, align 8
%tmp37 = zext i32 %tmp25 to i64
- %tmp38 = getelementptr inbounds %float4, %float4* %tmp36, i64 %tmp37
- %tmp39 = bitcast %float4* %tmp38 to i8*
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1 %tmp39, i64 undef, i1 false)
- %tmp40 = getelementptr inbounds i8, i8* %tmp22, i64 48
- %tmp41 = getelementptr inbounds i8, i8* %tmp40, i64 8
- %14 = bitcast i8* %tmp41 to float*
- %tmp42 = load float, float* %14, align 4
- %tmp44 = load float, float* inttoptr (i64 8 to float*), align 8
+ %tmp38 = getelementptr inbounds %float4, ptr %tmp36, i64 %tmp37
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 undef, ptr align 1 %tmp38, i64 undef, i1 false)
+ %tmp40 = getelementptr inbounds i8, ptr %tmp22, i64 48
+ %tmp41 = getelementptr inbounds i8, ptr %tmp40, i64 8
+ %tmp42 = load float, ptr %tmp41, align 4
+ %tmp44 = load float, ptr inttoptr (i64 8 to ptr), align 8
%tmp45 = fsub contract float %tmp42, %tmp44
%tmp46 = tail call %float4 @snork(float %tmp45)
%tmp.i = tail call i64 @foo()
- %tmp10.i = load i64, i64* %9, align 16
+ %tmp10.i = load i64, ptr %tmp9.i, align 16
%tmp11.i = add i64 %tmp10.i, %tmp.i
- store i64 %tmp11.i, i64* %9, align 16, !tbaa !1
+ store i64 %tmp11.i, ptr %tmp9.i, align 16, !tbaa !1
%tmp43.i = add i64 %tmp11.i, %tmp42.i
- %tmp44.i = getelementptr inbounds i16, i16* %tmp5, i64 %tmp43.i
- %tmp45.i = load i16, i16* %tmp44.i, align 2
+ %tmp44.i = getelementptr inbounds i16, ptr %tmp5, i64 %tmp43.i
+ %tmp45.i = load i16, ptr %tmp44.i, align 2
%tmp47.i = icmp eq i16 %tmp45.i, -1
%extract21 = lshr i16 %tmp45.i, 11
%extract.t22 = trunc i16 %extract21 to i8
!nvvm.annotations = !{!0}
-!0 = !{void (%struct.spam.2*)* @barney, !"kernel", i32 1}
+!0 = !{ptr @barney, !"kernel", i32 1}
!1 = !{!2, !11, i64 64}
!2 = !{!"_ZTSN7cuneibs22neiblist_iterator_coreE", !3, i64 0, !3, i64 8, !6, i64 16, !8, i64 32, !9, i64 44, !10, i64 48, !11, i64 64, !9, i64 72, !4, i64 76, !9, i64 80}
!3 = !{!"any pointer", !4, i64 0}
@"bfloat_array" = addrspace(1) constant [4 x bfloat]
[bfloat 0xR0201, bfloat 0xR0403, bfloat 0xR0605, bfloat 0xR0807]
-define void @test_load_store(bfloat addrspace(1)* %in, bfloat addrspace(1)* %out) {
+define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @test_load_store
; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
- %val = load bfloat, bfloat addrspace(1)* %in
- store bfloat %val, bfloat addrspace(1) * %out
+ %val = load bfloat, ptr addrspace(1) %in
+ store bfloat %val, ptr addrspace(1) %out
ret void
}
-define void @test_bitcast_from_bfloat(bfloat addrspace(1)* %in, i16 addrspace(1)* %out) {
+define void @test_bitcast_from_bfloat(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @test_bitcast_from_bfloat
; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
- %val = load bfloat, bfloat addrspace(1) * %in
+ %val = load bfloat, ptr addrspace(1) %in
%val_int = bitcast bfloat %val to i16
- store i16 %val_int, i16 addrspace(1)* %out
+ store i16 %val_int, ptr addrspace(1) %out
ret void
}
-define void @test_bitcast_to_bfloat(bfloat addrspace(1)* %out, i16 addrspace(1)* %in) {
+define void @test_bitcast_to_bfloat(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; CHECK-LABEL: @test_bitcast_to_bfloat
; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
- %val = load i16, i16 addrspace(1)* %in
+ %val = load i16, ptr addrspace(1) %in
%val_fp = bitcast i16 %val to bfloat
- store bfloat %val_fp, bfloat addrspace(1)* %out
+ store bfloat %val_fp, ptr addrspace(1) %out
ret void
}
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
-define void @foo(i32 %x, float* %output) {
+define void @foo(i32 %x, ptr %output) {
; CHECK-LABEL: .visible .func foo(
; CHECK-NOT: bra.uni
; CHECK-NOT: LBB0_
br label %merge
merge:
- store float 2.0, float* %output
+ store float 2.0, ptr %output
ret void
}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"\r
target triple = "nvptx64-nvidia-cuda"\r
\r
-define private ptx_device { double, double } @__utils1_MOD_trace(%"struct.array2_complex(kind=8).43.5.57"* noalias %m) {\r
+define private ptx_device { double, double } @__utils1_MOD_trace(ptr noalias %m) {\r
entry:\r
;unreachable\r
%t0 = insertvalue {double, double} undef, double 1.0, 0\r
}\r
\r
%struct.descriptor_dimension.0.52 = type { i64, i64, i64 }\r
-%"struct.array2_complex(kind=8).37.18.70" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }\r
-%"struct.array2_complex(kind=8).43.5.57" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }\r
+%"struct.array2_complex(kind=8).37.18.70" = type { ptr, i64, i64, [2 x %struct.descriptor_dimension.0.52] }\r
+%"struct.array2_complex(kind=8).43.5.57" = type { ptr, i64, i64, [2 x %struct.descriptor_dimension.0.52] }\r
@replacementOfAlloca8 = private global %"struct.array2_complex(kind=8).37.18.70" zeroinitializer, align 4096\r
\r
; CHECK: .visible .entry __kernelgen_main\r
-define ptx_kernel void @__kernelgen_main(i32* nocapture %args, i32*) {\r
+define ptx_kernel void @__kernelgen_main(ptr nocapture %args, ptr) {\r
entry:\r
- %1 = tail call ptx_device { double, double } bitcast ({ double, double } (%"struct.array2_complex(kind=8).43.5.57"*)* @__utils1_MOD_trace to { double, double } (%"struct.array2_complex(kind=8).37.18.70"*)*)(%"struct.array2_complex(kind=8).37.18.70"* noalias @replacementOfAlloca8)\r
+ %1 = tail call ptx_device { double, double } @__utils1_MOD_trace(ptr noalias @replacementOfAlloca8)\r
ret void\r
}\r
\r
%struct.S = type { i32, i32 }
; Function Attrs: nounwind
-define void @_Z11TakesStruct1SPi(%struct.S* byval(%struct.S) nocapture readonly %input, i32* nocapture %output) #0 {
+define void @_Z11TakesStruct1SPi(ptr byval(%struct.S) nocapture readonly %input, ptr nocapture %output) #0 {
entry:
; CHECK-LABEL: @_Z11TakesStruct1SPi
; PTX-LABEL: .visible .entry _Z11TakesStruct1SPi(
-; CHECK: addrspacecast %struct.S* %input to %struct.S addrspace(101)*
- %b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
- %0 = load i32, i32* %b, align 4
+; CHECK: addrspacecast ptr %input to ptr addrspace(101)
+ %b = getelementptr inbounds %struct.S, ptr %input, i64 0, i32 1
+ %0 = load i32, ptr %b, align 4
; PTX-NOT: ld.param.u32 {{%r[0-9]+}}, [{{%rd[0-9]+}}]
; PTX: ld.param.u32 [[value:%r[0-9]+]], [_Z11TakesStruct1SPi_param_0+4]
- store i32 %0, i32* %output, align 4
+ store i32 %0, ptr %output, align 4
; PTX-NEXT: st.global.u32 [{{%rd[0-9]+}}], [[value]]
ret void
}
!nvvm.annotations = !{!0}
-!0 = !{void (%struct.S*, i32*)* @_Z11TakesStruct1SPi, !"kernel", i32 1}
+!0 = !{ptr @_Z11TakesStruct1SPi, !"kernel", i32 1}
target triple = "nvptx64-nvidia-cuda"
; CHECK-LABEL: _Z3foobbbPb
-define void @_Z3foobbbPb(i1 zeroext %p1, i1 zeroext %p2, i1 zeroext %p3, i8* nocapture %output) {
+define void @_Z3foobbbPb(i1 zeroext %p1, i1 zeroext %p2, i1 zeroext %p3, ptr nocapture %output) {
entry:
; CHECK: selp.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}}
%.sink.v = select i1 %p1, i1 %p2, i1 %p3
%frombool5 = zext i1 %.sink.v to i8
- store i8 %frombool5, i8* %output, align 1
+ store i8 %frombool5, ptr %output, align 1
ret void
}
; Function Attrs: nounwind
; CHECK-LABEL: some_kernel
-define void @some_kernel(%class.float3* nocapture %dst) #0 {
+define void @some_kernel(ptr nocapture %dst) #0 {
_ZL11compute_vecRK6float3jb.exit:
%ret_vec.sroa.8.i = alloca float, align 4
%0 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
%3 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%4 = add nsw i32 %2, %3
%5 = zext i32 %4 to i64
- %6 = bitcast float* %ret_vec.sroa.8.i to i8*
- call void @llvm.lifetime.start.p0i8(i64 4, i8* %6)
- %7 = and i32 %4, 15
- %8 = icmp eq i32 %7, 0
- %9 = select i1 %8, float 0.000000e+00, float -1.000000e+00
- store float %9, float* %ret_vec.sroa.8.i, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr %ret_vec.sroa.8.i)
+ %6 = and i32 %4, 15
+ %7 = icmp eq i32 %6, 0
+ %8 = select i1 %7, float 0.000000e+00, float -1.000000e+00
+ store float %8, ptr %ret_vec.sroa.8.i, align 4
; CHECK: max.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, 0f00000000
- %10 = fcmp olt float %9, 0.000000e+00
- %ret_vec.sroa.8.i.val = load float, float* %ret_vec.sroa.8.i, align 4
- %11 = select i1 %10, float 0.000000e+00, float %ret_vec.sroa.8.i.val
- call void @llvm.lifetime.end.p0i8(i64 4, i8* %6)
- %12 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 0
- store float 0.000000e+00, float* %12, align 4
- %13 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 1
- store float %11, float* %13, align 4
- %14 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 2
- store float 0.000000e+00, float* %14, align 4
+ %9 = fcmp olt float %8, 0.000000e+00
+ %ret_vec.sroa.8.i.val = load float, ptr %ret_vec.sroa.8.i, align 4
+ %10 = select i1 %9, float 0.000000e+00, float %ret_vec.sroa.8.i.val
+ call void @llvm.lifetime.end.p0(i64 4, ptr %ret_vec.sroa.8.i)
+ %11 = getelementptr inbounds %class.float3, ptr %dst, i64 %5, i32 0
+ store float 0.000000e+00, ptr %11, align 4
+ %12 = getelementptr inbounds %class.float3, ptr %dst, i64 %5, i32 1
+ store float %10, ptr %12, align 4
+ %13 = getelementptr inbounds %class.float3, ptr %dst, i64 %5, i32 2
+ store float 0.000000e+00, ptr %13, align 4
ret void
}
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1
; Function Attrs: nounwind
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #2
; Function Attrs: nounwind
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #2
attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "no-signed-zeros-fp-math"="true" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!nvvm.annotations = !{!0}
!llvm.ident = !{!1}
-!0 = !{void (%class.float3*)* @some_kernel, !"kernel", i32 1}
+!0 = !{ptr @some_kernel, !"kernel", i32 1}
!1 = !{!"clang version 3.5.1 (tags/RELEASE_351/final)"}
target triple = "nvptx64-nvidia-cuda"
; CHECK-LABEL: spam
-define ptx_kernel void @spam(i8 addrspace(1)* noalias nocapture readonly %arg, i8 addrspace(1)* noalias nocapture %arg1, i64 %arg2, i64 %arg3) #0 {
+define ptx_kernel void @spam(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture %arg1, i64 %arg2, i64 %arg3) #0 {
bb:
- %tmp = bitcast i8 addrspace(1)* %arg to i16 addrspace(1)*
- %tmp4 = bitcast i8 addrspace(1)* %arg1 to i64 addrspace(1)*
%tmp5 = add nsw i64 %arg3, 8
- %tmp6 = getelementptr i16, i16 addrspace(1)* %tmp, i64 %tmp5
+ %tmp6 = getelementptr i16, ptr addrspace(1) %arg, i64 %tmp5
; CHECK: ld.global.nc.u16
- %tmp7 = load i16, i16 addrspace(1)* %tmp6, align 2
+ %tmp7 = load i16, ptr addrspace(1) %tmp6, align 2
; CHECK: cvt.s32.s16
%tmp8 = sext i16 %tmp7 to i64
%tmp9 = mul nsw i64 %tmp8, %tmp8
- %tmp10 = load i64, i64 addrspace(1)* %tmp4, align 8
+ %tmp10 = load i64, ptr addrspace(1) %arg1, align 8
%tmp11 = add nsw i64 %tmp9, %tmp10
- store i64 %tmp11, i64 addrspace(1)* %tmp4, align 8
+ store i64 %tmp11, ptr addrspace(1) %arg1, align 8
ret void
}
!nvvm.annotations = !{!0}
-!0 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i64, i64)* @spam, !"maxntidx", i64 1, !"maxntidy", i64 1, !"maxntidz", i64 1}
+!0 = !{ptr @spam, !"maxntidx", i64 1, !"maxntidy", i64 1, !"maxntidz", i64 1}
target triple = "nvptx64-unknown-unknown"
; CHECK-LABEL: ex_zext
-define void @ex_zext(i8* noalias readonly %data, i32* %res) {
+define void @ex_zext(ptr noalias readonly %data, ptr %res) {
entry:
; CHECK: ld.global.nc.u8
- %val = load i8, i8* %data
+ %val = load i8, ptr %data
; CHECK: cvt.u32.u8
%valext = zext i8 %val to i32
- store i32 %valext, i32* %res
+ store i32 %valext, ptr %res
ret void
}
; CHECK-LABEL: ex_sext
-define void @ex_sext(i8* noalias readonly %data, i32* %res) {
+define void @ex_sext(ptr noalias readonly %data, ptr %res) {
entry:
; CHECK: ld.global.nc.u8
- %val = load i8, i8* %data
+ %val = load i8, ptr %data
; CHECK: cvt.s32.s8
%valext = sext i8 %val to i32
- store i32 %valext, i32* %res
+ store i32 %valext, ptr %res
ret void
}
; CHECK-LABEL: ex_zext_v2
-define void @ex_zext_v2(<2 x i8>* noalias readonly %data, <2 x i32>* %res) {
+define void @ex_zext_v2(ptr noalias readonly %data, ptr %res) {
entry:
; CHECK: ld.global.nc.v2.u8
- %val = load <2 x i8>, <2 x i8>* %data
+ %val = load <2 x i8>, ptr %data
; CHECK: cvt.u32.u16
%valext = zext <2 x i8> %val to <2 x i32>
- store <2 x i32> %valext, <2 x i32>* %res
+ store <2 x i32> %valext, ptr %res
ret void
}
; CHECK-LABEL: ex_sext_v2
-define void @ex_sext_v2(<2 x i8>* noalias readonly %data, <2 x i32>* %res) {
+define void @ex_sext_v2(ptr noalias readonly %data, ptr %res) {
entry:
; CHECK: ld.global.nc.v2.u8
- %val = load <2 x i8>, <2 x i8>* %data
+ %val = load <2 x i8>, ptr %data
; CHECK: cvt.s32.s8
%valext = sext <2 x i8> %val to <2 x i32>
- store <2 x i32> %valext, <2 x i32>* %res
+ store <2 x i32> %valext, ptr %res
ret void
}
!nvvm.annotations = !{!0,!1,!2,!3}
-!0 = !{void (i8*, i32*)* @ex_zext, !"kernel", i32 1}
-!1 = !{void (i8*, i32*)* @ex_sext, !"kernel", i32 1}
-!2 = !{void (<2 x i8>*, <2 x i32>*)* @ex_zext_v2, !"kernel", i32 1}
-!3 = !{void (<2 x i8>*, <2 x i32>*)* @ex_sext_v2, !"kernel", i32 1}
+!0 = !{ptr @ex_zext, !"kernel", i32 1}
+!1 = !{ptr @ex_sext, !"kernel", i32 1}
+!2 = !{ptr @ex_zext_v2, !"kernel", i32 1}
+!3 = !{ptr @ex_sext_v2, !"kernel", i32 1}
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
-%func = type { i32 (i32, i32)** }
+%func = type { ptr }
; CHECK: foo
; CHECK: call
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
-%printf_args.0.8 = type { i8* }
+%printf_args.0.8 = type { ptr }
define internal i32 @__kmpc_get_hardware_thread_id_in_block(i1 %0) {
%2 = alloca %printf_args.0.8, i32 0, align 8
- %3 = bitcast %printf_args.0.8* %2 to i8*
br i1 true, label %._crit_edge1, label %._crit_edge
._crit_edge: ; preds = %1, %._crit_edge
- %4 = call i32 null(i8* null, i8* %3)
+ %3 = call i32 null(ptr null, ptr %2)
br i1 %0, label %._crit_edge, label %._crit_edge1
._crit_edge1: ; preds = %._crit_edge, %1
; 64-bit divides and rems should be split into a fast and slow path where
; the fast path uses a 32-bit operation.
-define void @sdiv64(i64 %a, i64 %b, i64* %retptr) {
+define void @sdiv64(i64 %a, i64 %b, ptr %retptr) {
; CHECK-LABEL: sdiv64(
; CHECK: div.s64
; CHECK: div.u32
; CHECK: ret
%d = sdiv i64 %a, %b
- store i64 %d, i64* %retptr
+ store i64 %d, ptr %retptr
ret void
}
-define void @udiv64(i64 %a, i64 %b, i64* %retptr) {
+define void @udiv64(i64 %a, i64 %b, ptr %retptr) {
; CHECK-LABEL: udiv64(
; CHECK: div.u64
; CHECK: div.u32
; CHECK: ret
%d = udiv i64 %a, %b
- store i64 %d, i64* %retptr
+ store i64 %d, ptr %retptr
ret void
}
-define void @srem64(i64 %a, i64 %b, i64* %retptr) {
+define void @srem64(i64 %a, i64 %b, ptr %retptr) {
; CHECK-LABEL: srem64(
; CHECK: rem.s64
; CHECK: rem.u32
; CHECK: ret
%d = srem i64 %a, %b
- store i64 %d, i64* %retptr
+ store i64 %d, ptr %retptr
ret void
}
-define void @urem64(i64 %a, i64 %b, i64* %retptr) {
+define void @urem64(i64 %a, i64 %b, ptr %retptr) {
; CHECK-LABEL: urem64(
; CHECK: rem.u64
; CHECK: rem.u32
; CHECK: ret
%d = urem i64 %a, %b
- store i64 %d, i64* %retptr
+ store i64 %d, ptr %retptr
ret void
}
-define void @sdiv32(i32 %a, i32 %b, i32* %retptr) {
+define void @sdiv32(i32 %a, i32 %b, ptr %retptr) {
; CHECK-LABEL: sdiv32(
; CHECK: div.s32
; CHECK-NOT: div.
%d = sdiv i32 %a, %b
- store i32 %d, i32* %retptr
+ store i32 %d, ptr %retptr
ret void
}
-define void @udiv32(i32 %a, i32 %b, i32* %retptr) {
+define void @udiv32(i32 %a, i32 %b, ptr %retptr) {
; CHECK-LABEL: udiv32(
; CHECK: div.u32
; CHECK-NOT: div.
%d = udiv i32 %a, %b
- store i32 %d, i32* %retptr
+ store i32 %d, ptr %retptr
ret void
}
-define void @srem32(i32 %a, i32 %b, i32* %retptr) {
+define void @srem32(i32 %a, i32 %b, ptr %retptr) {
; CHECK-LABEL: srem32(
; CHECK: rem.s32
; CHECK-NOT: rem.
%d = srem i32 %a, %b
- store i32 %d, i32* %retptr
+ store i32 %d, ptr %retptr
ret void
}
-define void @urem32(i32 %a, i32 %b, i32* %retptr) {
+define void @urem32(i32 %a, i32 %b, ptr %retptr) {
; CHECK-LABEL: urem32(
; CHECK: rem.u32
; CHECK-NOT: rem.
%d = urem i32 %a, %b
- store i32 %d, i32* %retptr
+ store i32 %d, ptr %retptr
ret void
}
; Checks how NVPTX lowers alloca buffers and their passing to functions.
;
; Produced with the following CUDA code:
-; extern "C" __attribute__((device)) void callee(float* f, char* buf);
+; extern "C" __attribute__((device)) void callee(ptr f, char* buf);
;
-; extern "C" __attribute__((global)) void kernel_func(float* a) {
+; extern "C" __attribute__((global)) void kernel_func(ptr a) {
; char buf[4 * sizeof(float)];
-; *(reinterpret_cast<float*>(&buf[0])) = a[0];
-; *(reinterpret_cast<float*>(&buf[1])) = a[1];
-; *(reinterpret_cast<float*>(&buf[2])) = a[2];
-; *(reinterpret_cast<float*>(&buf[3])) = a[3];
+; *(reinterpret_cast<ptr>(&buf[0])) = a[0];
+; *(reinterpret_cast<ptr>(&buf[1])) = a[1];
+; *(reinterpret_cast<ptr>(&buf[2])) = a[2];
+; *(reinterpret_cast<ptr>(&buf[3])) = a[3];
; callee(a, buf);
; }
; CHECK: .visible .entry kernel_func
-define void @kernel_func(float* %a) {
+define void @kernel_func(ptr %a) {
entry:
%buf = alloca [16 x i8], align 4
; CHECK: ld.global.f32 %f[[A0_REG:[0-9]+]], [%rd[[A1_REG]]]
; CHECK: st.local.f32 [{{%rd[0-9]+}}], %f[[A0_REG]]
- %0 = load float, float* %a, align 4
- %1 = bitcast [16 x i8]* %buf to float*
- store float %0, float* %1, align 4
- %arrayidx2 = getelementptr inbounds float, float* %a, i64 1
- %2 = load float, float* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 1
- %3 = bitcast i8* %arrayidx3 to float*
- store float %2, float* %3, align 4
- %arrayidx4 = getelementptr inbounds float, float* %a, i64 2
- %4 = load float, float* %arrayidx4, align 4
- %arrayidx5 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 2
- %5 = bitcast i8* %arrayidx5 to float*
- store float %4, float* %5, align 4
- %arrayidx6 = getelementptr inbounds float, float* %a, i64 3
- %6 = load float, float* %arrayidx6, align 4
- %arrayidx7 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 3
- %7 = bitcast i8* %arrayidx7 to float*
- store float %6, float* %7, align 4
+ %0 = load float, ptr %a, align 4
+ store float %0, ptr %buf, align 4
+ %arrayidx2 = getelementptr inbounds float, ptr %a, i64 1
+ %1 = load float, ptr %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds [16 x i8], ptr %buf, i64 0, i64 1
+ store float %1, ptr %arrayidx3, align 4
+ %arrayidx4 = getelementptr inbounds float, ptr %a, i64 2
+ %2 = load float, ptr %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds [16 x i8], ptr %buf, i64 0, i64 2
+ store float %2, ptr %arrayidx5, align 4
+ %arrayidx6 = getelementptr inbounds float, ptr %a, i64 3
+ %3 = load float, ptr %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds [16 x i8], ptr %buf, i64 0, i64 3
+ store float %3, ptr %arrayidx7, align 4
; CHECK: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0+0], %rd[[A_REG]]
; CHECK-NEXT: call.uni
; CHECK-NEXT: callee,
- %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 0
- call void @callee(float* %a, i8* %arraydecay) #2
+ call void @callee(ptr %a, ptr %buf) #2
ret void
}
-declare void @callee(float*, i8*)
+declare void @callee(ptr, ptr)
!nvvm.annotations = !{!0}
-!0 = !{void (float*)* @kernel_func, !"kernel", i32 1}
+!0 = !{ptr @kernel_func, !"kernel", i32 1}
; CHECK-NEXT: _Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE,
define weak_odr void @foo() {
entry:
- %call.i.i.i = tail call %"class.complex" bitcast (%complex_half ()* @_Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE to %"class.complex" (i32, i32, %"class.complex"*)*)(i32 0, i32 0, %"class.complex"* byval(%"class.complex") null)
+ %call.i.i.i = tail call %"class.complex" @_Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE(i32 0, i32 0, ptr byval(%"class.complex") null)
ret void
}
;; Function pointers can escape, so we have to use a conservative
;; alignment for a function that has address taken.
;;
-declare i8* @usefp(i8* %fp)
+declare ptr @usefp(ptr %fp)
; CHECK: .func callee(
; CHECK-NEXT: .param .align 4 .b8 callee_param_0[4]
-define internal void @callee(%"class.complex"* byval(%"class.complex") %byval_arg) {
+define internal void @callee(ptr byval(%"class.complex") %byval_arg) {
ret void
}
define void @boom() {
- %fp = call i8* @usefp(i8* bitcast (void (%"class.complex"*)* @callee to i8*))
- %cast = bitcast i8* %fp to void (%"class.complex"*)*
+ %fp = call ptr @usefp(ptr @callee)
; CHECK: .param .align 4 .b8 param0[4];
; CHECK: st.param.v2.b16 [param0+0]
; CHECK: .callprototype ()_ (.param .align 2 .b8 _[4]);
- call void %cast(%"class.complex"* byval(%"class.complex") null)
+ call void %fp(ptr byval(%"class.complex") null)
ret void
}
target triple = "nvptx"
-define void @foo(i8* %ptr) {
- %fnptr = bitcast i8* %ptr to void ()*
+define void @foo(ptr %ptr) {
; CHECK: prototype_0 : .callprototype ()_ ()
- tail call void %fnptr()
+ tail call void %ptr()
ret void
}
;; Kernel function using ptx_kernel calling conv
; CHECK: .entry kernel_func
-define ptx_kernel void @kernel_func(float* %a) {
+define ptx_kernel void @kernel_func(ptr %a) {
; CHECK: ret
ret void
}
;; Device function
; CHECK: .func device_func
-define void @device_func(float* %a) {
+define void @device_func(ptr %a) {
; CHECK: ret
ret void
}
;; Kernel function using NVVM metadata
; CHECK: .entry metadata_kernel
-define void @metadata_kernel(float* %a) {
+define void @metadata_kernel(ptr %a) {
; CHECK: ret
ret void
}
!nvvm.annotations = !{!1}
-!1 = !{void (float*)* @metadata_kernel, !"kernel", i32 1}
+!1 = !{ptr @metadata_kernel, !"kernel", i32 1}
declare void @conv() convergent
declare void @not_conv()
-define void @test(void ()* %f) {
+define void @test(ptr %f) {
; CHECK: ConvergentCallUniPrintCall
; CHECK-NEXT: @conv
call void @conv()
; Here we store the result of ctlz.16 into an i16 pointer, so the trunc should
; remain.
; CHECK-LABEL: myctlz_store16(
-define void @myctlz_store16(i16 %a, i16* %b) {
+define void @myctlz_store16(i16 %a, ptr %b) {
; CHECK: ld.param.
; CHECK-NEXT: cvt.u32.u16
; CHECK-NEXT: clz.b32
; CHECK: st.{{[a-z]}}16
; CHECK: ret;
%val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
- store i16 %val, i16* %b
+ store i16 %val, ptr %b
ret void
}
; CHECK-LABEL: myctlz_store16_2(
-define void @myctlz_store16_2(i16 %a, i16* %b) {
+define void @myctlz_store16_2(i16 %a, ptr %b) {
; CHECK: ld.param.
; CHECK-NEXT: cvt.u32.u16
; CHECK-NEXT: clz.b32
; CHECK: st.{{[a-z]}}16
; CHECK: ret;
%val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
- store i16 %val, i16* %b
+ store i16 %val, ptr %b
ret void
}
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O0 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -O0 | %ptxas-verify %}
-define void @foo(i32* %output) {
+define void @foo(ptr %output) {
; CHECK-LABEL: .visible .func foo(
entry:
%local = alloca i32
; CHECK: __local_depot
- store i32 1, i32* %local
- %0 = load i32, i32* %local
- store i32 %0, i32* %output
+ store i32 1, ptr %local
+ %0 = load i32, ptr %local
+ store i32 %0, ptr %output
ret void
}
; during NVPTX isel, at -O2. At -O0, we should leave it alone.
; CHECK-LABEL: sdiv32(
-define void @sdiv32(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
+define void @sdiv32(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
; CHECK: div.s32 [[quot:%r[0-9]+]], [[num:%r[0-9]+]], [[den:%r[0-9]+]];
%quot = sdiv i32 %n, %d
%rem = srem i32 %n, %d
; O2: st{{.*}}[[quot]]
- store i32 %quot, i32* %quot_ret
+ store i32 %quot, ptr %quot_ret
; O2: st{{.*}}[[rem]]
- store i32 %rem, i32* %rem_ret
+ store i32 %rem, ptr %rem_ret
ret void
}
; CHECK-LABEL: udiv32(
-define void @udiv32(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
+define void @udiv32(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
; CHECK: div.u32 [[quot:%r[0-9]+]], [[num:%r[0-9]+]], [[den:%r[0-9]+]];
%quot = udiv i32 %n, %d
%rem = urem i32 %n, %d
; O2: st{{.*}}[[quot]]
- store i32 %quot, i32* %quot_ret
+ store i32 %quot, ptr %quot_ret
; O2: st{{.*}}[[rem]]
- store i32 %rem, i32* %rem_ret
+ store i32 %rem, ptr %rem_ret
ret void
}
; Check that we don't perform this optimization if one operation is signed and
; the other isn't.
; CHECK-LABEL: mismatched_types1(
-define void @mismatched_types1(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
+define void @mismatched_types1(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
; CHECK: div.u32
; CHECK: rem.s32
%quot = udiv i32 %n, %d
%rem = srem i32 %n, %d
- store i32 %quot, i32* %quot_ret
- store i32 %rem, i32* %rem_ret
+ store i32 %quot, ptr %quot_ret
+ store i32 %rem, ptr %rem_ret
ret void
}
; CHECK-LABEL: mismatched_types2(
-define void @mismatched_types2(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
+define void @mismatched_types2(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
; CHECK: div.s32
; CHECK: rem.u32
%quot = sdiv i32 %n, %d
%rem = urem i32 %n, %d
- store i32 %quot, i32* %quot_ret
- store i32 %rem, i32* %rem_ret
+ store i32 %quot, ptr %quot_ret
+ store i32 %rem, ptr %rem_ret
ret void
}
; Check that we don't perform this optimization if the inputs to the div don't
; match the inputs to the rem.
; CHECK-LABEL: mismatched_inputs1(
-define void @mismatched_inputs1(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
+define void @mismatched_inputs1(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
; CHECK: div.s32
; CHECK: rem.s32
%quot = sdiv i32 %n, %d
%rem = srem i32 %d, %n
- store i32 %quot, i32* %quot_ret
- store i32 %rem, i32* %rem_ret
+ store i32 %quot, ptr %quot_ret
+ store i32 %rem, ptr %rem_ret
ret void
}
; CHECK-LABEL: mismatched_inputs2(
-define void @mismatched_inputs2(i32 %n1, i32 %n2, i32 %d, i32* %quot_ret, i32* %rem_ret) {
+define void @mismatched_inputs2(i32 %n1, i32 %n2, i32 %d, ptr %quot_ret, ptr %rem_ret) {
; CHECK: div.s32
; CHECK: rem.s32
%quot = sdiv i32 %n1, %d
%rem = srem i32 %n2, %d
- store i32 %quot, i32* %quot_ret
- store i32 %rem, i32* %rem_ret
+ store i32 %quot, ptr %quot_ret
+ store i32 %rem, ptr %rem_ret
ret void
}
; CHECK-LABEL: mismatched_inputs3(
-define void @mismatched_inputs3(i32 %n, i32 %d1, i32 %d2, i32* %quot_ret, i32* %rem_ret) {
+define void @mismatched_inputs3(i32 %n, i32 %d1, i32 %d2, ptr %quot_ret, ptr %rem_ret) {
; CHECK: div.s32
; CHECK: rem.s32
%quot = sdiv i32 %n, %d1
%rem = srem i32 %n, %d2
- store i32 %quot, i32* %quot_ret
- store i32 %rem, i32* %rem_ret
+ store i32 %quot, ptr %quot_ret
+ store i32 %rem, ptr %rem_ret
ret void
}
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %}
-define void @foo(float* nocapture readonly %x_value, double* nocapture %output) #0 {
- %1 = bitcast float* %x_value to <4 x float>*
- %2 = load <4 x float>, <4 x float>* %1, align 16
- %3 = fpext <4 x float> %2 to <4 x double>
+define void @foo(ptr nocapture readonly %x_value, ptr nocapture %output) #0 {
+ %1 = load <4 x float>, ptr %x_value, align 16
+ %2 = fpext <4 x float> %1 to <4 x double>
; CHECK-NOT: ld.v2.f32 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}];
; CHECK: cvt.f64.f32
; CHECK: cvt.f64.f32
; CHECK: cvt.f64.f32
; CHECK: cvt.f64.f32
- %4 = bitcast double* %output to <4 x double>*
- store <4 x double> %3, <4 x double>* %4
+ store <4 x double> %2, ptr %output
ret void
}
; CHECK-DAG: ld.param.u64 %[[PTR:rd[0-9]+]], [test_store_param_1];
; CHECK-NEXT: st.b16 [%[[PTR]]], [[A]];
; CHECK-NEXT: ret;
-define void @test_store(half %a, half* %b) #0 {
- store half %a, half* %b
+define void @test_store(half %a, ptr %b) #0 {
+ store half %a, ptr %b
ret void
}
; CHECK-NEXT: ld.b16 [[R:%h[0-9]+]], [%[[PTR]]];
; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]];
; CHECK-NEXT: ret;
-define half @test_load(half* %a) #0 {
- %r = load half, half* %a
+define half @test_load(ptr %a) #0 {
+ %r = load half, ptr %a
ret half %r
}
; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]]
; CHECK: ret
-define void @test_halfp0a1(half * noalias readonly %from, half * %to) {
- %1 = load half, half * %from , align 1
- store half %1, half * %to , align 1
+define void @test_halfp0a1(ptr noalias readonly %from, ptr %to) {
+ %1 = load half, ptr %from , align 1
+ store half %1, ptr %to , align 1
ret void
}
; CHECK: [[LABEL]]:
; CHECK: st.u32 [%[[D]]],
; CHECK: ret;
-define void @test_br_cc(half %a, half %b, i32* %p1, i32* %p2) #0 {
+define void @test_br_cc(half %a, half %b, ptr %p1, ptr %p2) #0 {
%c = fcmp uge half %a, %b
br i1 %c, label %then, label %else
then:
- store i32 0, i32* %p1
+ store i32 0, ptr %p1
ret void
else:
- store i32 0, i32* %p2
+ store i32 0, ptr %p2
ret void
}
; CHECK: @[[PRED]] bra [[LOOP]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
-define half @test_phi(half* %p1) #0 {
+define half @test_phi(ptr %p1) #0 {
entry:
- %a = load half, half* %p1
+ %a = load half, ptr %p1
br label %loop
loop:
%r = phi half [%a, %entry], [%b, %loop]
- %b = load half, half* %p1
- %c = call i1 @test_dummy(half* %p1)
+ %b = load half, ptr %p1
+ %c = call i1 @test_dummy(ptr %p1)
br i1 %c, label %loop, label %return
return:
ret half %r
}
-declare i1 @test_dummy(half* %p1) #0
+declare i1 @test_dummy(ptr %p1) #0
; CHECK-LABEL: test_fptosi_i32(
; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i32_param_0];
; CHECK: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]];
; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]};
; CHECK: ret;
-define void @test_ldst_v2f16(<2 x half>* %a, <2 x half>* %b) {
- %t1 = load <2 x half>, <2 x half>* %a
- store <2 x half> %t1, <2 x half>* %b, align 16
+define void @test_ldst_v2f16(ptr %a, ptr %b) {
+ %t1 = load <2 x half>, ptr %a
+ store <2 x half> %t1, ptr %b, align 16
ret void
}
; CHECK-DAG: st.u32 [%[[B]]],
; CHECK-DAG: st.b16 [%[[B]]+4],
; CHECK: ret;
-define void @test_ldst_v3f16(<3 x half>* %a, <3 x half>* %b) {
- %t1 = load <3 x half>, <3 x half>* %a
- store <3 x half> %t1, <3 x half>* %b, align 16
+define void @test_ldst_v3f16(ptr %a, ptr %b) {
+ %t1 = load <3 x half>, ptr %a
+ store <3 x half> %t1, ptr %b, align 16
ret void
}
; CHECK-DAG: ld.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]];
; CHECK-DAG: st.v4.b16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
; CHECK: ret;
-define void @test_ldst_v4f16(<4 x half>* %a, <4 x half>* %b) {
- %t1 = load <4 x half>, <4 x half>* %a
- store <4 x half> %t1, <4 x half>* %b, align 16
+define void @test_ldst_v4f16(ptr %a, ptr %b) {
+ %t1 = load <4 x half>, ptr %a
+ store <4 x half> %t1, ptr %b, align 16
ret void
}
; CHECK-DAG: ld.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [%[[A]]];
; CHECK-DAG: st.v4.b32 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
; CHECK: ret;
-define void @test_ldst_v8f16(<8 x half>* %a, <8 x half>* %b) {
- %t1 = load <8 x half>, <8 x half>* %a
- store <8 x half> %t1, <8 x half>* %b, align 16
+define void @test_ldst_v8f16(ptr %a, ptr %b) {
+ %t1 = load <8 x half>, ptr %a
+ store <8 x half> %t1, ptr %b, align 16
ret void
}
; CHECK-LABEL: @test_convert_fp16_to_fp32
; CHECK: cvt.f32.f16
-define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
- %val = load i16, i16 addrspace(1)* %in, align 2
+define void @test_convert_fp16_to_fp32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+ %val = load i16, ptr addrspace(1) %in, align 2
%cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
- store float %cvt, float addrspace(1)* %out, align 4
+ store float %cvt, ptr addrspace(1) %out, align 4
ret void
}
; CHECK-LABEL: @test_convert_fp16_to_fp64
; CHECK: cvt.f64.f16
-define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
- %val = load i16, i16 addrspace(1)* %in, align 2
+define void @test_convert_fp16_to_fp64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+ %val = load i16, ptr addrspace(1) %in, align 2
%cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
- store double %cvt, double addrspace(1)* %out, align 4
+ store double %cvt, ptr addrspace(1) %out, align 4
ret void
}
; CHECK-LABEL: @test_convert_fp32_to_fp16
; CHECK: cvt.rn.f16.f32
-define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %val = load float, float addrspace(1)* %in, align 2
+define void @test_convert_fp32_to_fp16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+ %val = load float, ptr addrspace(1) %in, align 2
%cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
- store i16 %cvt, i16 addrspace(1)* %out, align 4
+ store i16 %cvt, ptr addrspace(1) %out, align 4
ret void
}
; CHECK-LABEL: @test_convert_fp64_to_fp16
; CHECK: cvt.rn.f16.f64
-define void @test_convert_fp64_to_fp16(i16 addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
- %val = load double, double addrspace(1)* %in, align 2
+define void @test_convert_fp64_to_fp16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+ %val = load double, ptr addrspace(1) %in, align 2
%cvt = call i16 @llvm.convert.to.fp16.f64(double %val) nounwind readnone
- store i16 %cvt, i16 addrspace(1)* %out, align 4
+ store i16 %cvt, ptr addrspace(1) %out, align 4
ret void
}
;CHECK-SAME: !dbg [[FUNCNODE:![0-9]+]]
entry:
; References to the variables must be converted back to generic address space.
-; CHECK-DAG: addrspacecast ([4 x i8] addrspace(1)* @.str to [4 x i8]*)
- %0 = load i8, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), align 1
+; CHECK-DAG: addrspacecast (ptr addrspace(1) @.str to ptr)
+ %0 = load i8, ptr @.str, align 1
call void @extfunc(i8 signext %0)
-; CHECK-DAG: addrspacecast (i8 addrspace(1)* @static_var to i8*)
- %1 = load i8, i8* @static_var, align 1
+; CHECK-DAG: addrspacecast (ptr addrspace(1) @static_var to ptr)
+ %1 = load i8, ptr @static_var, align 1
call void @extfunc(i8 signext %1)
ret void
; CHECK: ret void
@myconst = internal constant i32 420, align 4
-define void @foo(i32* %a, i32* %b) {
+define void @foo(ptr %a, ptr %b) {
; Expect one load -- @myconst isn't loaded from, because we know its value
; statically.
; CHECK: ld.global.u32
; CHECK: st.global.u32
; CHECK: st.global.u32
- %ld1 = load i32, i32* @myglobal
- %ld2 = load i32, i32* @myconst
- store i32 %ld1, i32* %a
- store i32 %ld2, i32* %b
+ %ld1 = load i32, ptr @myglobal
+ %ld2 = load i32, ptr @myconst
+ store i32 %ld1, ptr %a
+ store i32 %ld2, ptr %b
ret void
}
!nvvm.annotations = !{!0}
-!0 = !{void (i32*, i32*)* @foo, !"kernel", i32 1}
+!0 = !{ptr @foo, !"kernel", i32 1}
; RUN: llc < %s -march=nvptx -mcpu=sm_20 2>&1
; Check that llc doesn't die when given an empty global ctor / dtor.
-@llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] []
-@llvm.global_dtors = appending global [0 x { i32, void ()*, i8* }] []
+@llvm.global_ctors = appending global [0 x { i32, ptr, ptr }] []
+@llvm.global_dtors = appending global [0 x { i32, ptr, ptr }] []
; RUN: not --crash llc < %s -march=nvptx -mcpu=sm_20 2>&1 | FileCheck %s
; Check that llc dies when given a nonempty global ctor.
-@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }]
+@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @foo, ptr null }]
; CHECK: ERROR: Module has a nontrivial global ctor
define internal void @foo() {
; RUN: not --crash llc < %s -march=nvptx -mcpu=sm_20 2>&1 | FileCheck %s
; Check that llc dies when given a nonempty global dtor.
-@llvm.global_dtors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }]
+@llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @foo, ptr null }]
; CHECK: ERROR: Module has a nontrivial global dtor
define internal void @foo() {
; PTX32-NEXT: .visible .global .align 4 .u32 a2 = a;
; PTX64: .visible .global .align 1 .u8 a = 2;
; PTX64-NEXT: .visible .global .align 8 .u64 a2 = a;
-@a2 = addrspace(1) global i8 addrspace(1)* @a
+@a2 = addrspace(1) global ptr addrspace(1) @a
@a = addrspace(1) global i8 2
; PTX32-NEXT: .visible .global .align 4 .u32 b2[2] = {b, b};
; PTX64: .visible .global .align 1 .u8 b = 1;
; PTX64-NEXT: .visible .global .align 8 .u64 b2[2] = {b, b};
-@b2 = addrspace(1) global [2 x i8 addrspace(1)*] [i8 addrspace(1)* @b, i8 addrspace(1)* @b]
+@b2 = addrspace(1) global [2 x ptr addrspace(1)] [ptr addrspace(1) @b, ptr addrspace(1) @b]
@b = addrspace(1) global i8 1
define void @foo(float %f) {
entry:
; CHK: ld.shared.f32 %{{[a-zA-Z0-9]+}}, [Gbl+8];
- %0 = load float, float addrspace(3)* getelementptr inbounds ([1024 x %MyStruct], [1024 x %MyStruct] addrspace(3)* @Gbl, i32 0, i32 0, i32 2)
+ %0 = load float, ptr addrspace(3) getelementptr inbounds ([1024 x %MyStruct], ptr addrspace(3) @Gbl, i32 0, i32 0, i32 2)
%add = fadd float %0, %f
; CHK: st.shared.f32 [Gbl+8], %{{[a-zA-Z0-9]+}};
- store float %add, float addrspace(3)* getelementptr inbounds ([1024 x %MyStruct], [1024 x %MyStruct] addrspace(3)* @Gbl, i32 0, i32 0, i32 2)
+ store float %add, ptr addrspace(3) getelementptr inbounds ([1024 x %MyStruct], ptr addrspace(3) @Gbl, i32 0, i32 0, i32 2)
ret void
}
@"half_array" = addrspace(1) constant [4 x half]
[half 0xH0201, half 0xH0403, half 0xH0605, half 0xH0807]
-define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
+define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @test_load_store
; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
- %val = load half, half addrspace(1)* %in
- store half %val, half addrspace(1) * %out
+ %val = load half, ptr addrspace(1) %in
+ store half %val, ptr addrspace(1) %out
ret void
}
-define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) {
+define void @test_bitcast_from_half(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @test_bitcast_from_half
; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
- %val = load half, half addrspace(1) * %in
+ %val = load half, ptr addrspace(1) %in
%val_int = bitcast half %val to i16
- store i16 %val_int, i16 addrspace(1)* %out
+ store i16 %val_int, ptr addrspace(1) %out
ret void
}
-define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) {
+define void @test_bitcast_to_half(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; CHECK-LABEL: @test_bitcast_to_half
; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
- %val = load i16, i16 addrspace(1)* %in
+ %val = load i16, ptr addrspace(1) %in
%val_fp = bitcast i16 %val to half
- store half %val_fp, half addrspace(1)* %out
+ store half %val_fp, ptr addrspace(1) %out
ret void
}
-define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
+define void @test_extend32(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @test_extend32
; CHECK: cvt.f32.f16
- %val16 = load half, half addrspace(1)* %in
+ %val16 = load half, ptr addrspace(1) %in
%val32 = fpext half %val16 to float
- store float %val32, float addrspace(1)* %out
+ store float %val32, ptr addrspace(1) %out
ret void
}
-define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
+define void @test_extend64(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @test_extend64
; CHECK: cvt.f64.f16
- %val16 = load half, half addrspace(1)* %in
+ %val16 = load half, ptr addrspace(1) %in
%val64 = fpext half %val16 to double
- store double %val64, double addrspace(1)* %out
+ store double %val64, ptr addrspace(1) %out
ret void
}
-define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) {
+define void @test_trunc32(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: test_trunc32
; CHECK: cvt.rn.f16.f32
- %val32 = load float, float addrspace(1)* %in
+ %val32 = load float, ptr addrspace(1) %in
%val16 = fptrunc float %val32 to half
- store half %val16, half addrspace(1)* %out
+ store half %val16, ptr addrspace(1) %out
ret void
}
-define void @test_trunc64(double addrspace(1)* %in, half addrspace(1)* %out) {
+define void @test_trunc64(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @test_trunc64
; CHECK: cvt.rn.f16.f64
- %val32 = load double, double addrspace(1)* %in
+ %val32 = load double, ptr addrspace(1) %in
%val16 = fptrunc double %val32 to half
- store half %val16, half addrspace(1)* %out
+ store half %val16, ptr addrspace(1) %out
ret void
}
@mypred = addrspace(1) global i1 true, align 1
-define void @foo(i1 %p, i32* %out) {
- %ld = load i1, i1 addrspace(1)* @mypred
+define void @foo(i1 %p, ptr %out) {
+ %ld = load i1, ptr addrspace(1) @mypred
%val = zext i1 %ld to i32
- store i32 %val, i32* %out
+ store i32 %val, ptr %out
ret void
}
!nvvm.annotations = !{!0}
-!0 = !{void (i1, i32*)* @foo, !"kernel", i32 1}
+!0 = !{ptr @foo, !"kernel", i32 1}
; CHECK: .entry foo
; CHECK: .param .u8 foo_param_0
; CHECK: .param .u32 foo_param_1
-define void @foo(i1 %p, i32* %out) {
+define void @foo(i1 %p, ptr %out) {
%val = zext i1 %p to i32
- store i32 %val, i32* %out
+ store i32 %val, ptr %out
ret void
}
!nvvm.annotations = !{!0}
-!0 = !{void (i1, i32*)* @foo, !"kernel", i32 1}
+!0 = !{ptr @foo, !"kernel", i32 1}
; CHECK-LABEL: .visible .func callee(
; CHECK-NEXT: .param .align 16 .b8 callee_param_0[16],
; CHECK-NEXT: .param .align 16 .b8 callee_param_1[16],
-define void @callee(i128, i128, i128*) {
+define void @callee(i128, i128, ptr) {
; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [callee_param_0];
; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [callee_param_1];
; CHECK-NEXT: mul.lo.s64 %[[REG9:rd[0-9]+]], %[[REG0]], %[[REG2]];
%a = mul i128 %0, %1
- store i128 %a, i128* %2
+ store i128 %a, ptr %2
ret void
}
; CHECK-LABEL: .visible .entry caller_kernel(
; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_0[16],
; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_1[16],
-define ptx_kernel void @caller_kernel(i128, i128, i128*) {
+define ptx_kernel void @caller_kernel(i128, i128, ptr) {
start:
; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_kernel_param_0];
; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_kernel_param_1];
; CHECK: .param .align 16 .b8 param1[16];
; CHECK-NEXT: st.param.v2.b64 [param1+0], {%[[REG2]], %[[REG3]]}
; CHECK: } // callseq [[CALLSEQ_ID]]
- call void @callee(i128 %0, i128 %1, i128* %2)
+ call void @callee(i128 %0, i128 %1, ptr %2)
ret void
}
; CHECK-LABEL: .visible .func caller_func(
; CHECK-NEXT: .param .align 16 .b8 caller_func_param_0[16],
; CHECK-NEXT: .param .align 16 .b8 caller_func_param_1[16],
-define void @caller_func(i128, i128, i128*) {
+define void @caller_func(i128, i128, ptr) {
start:
; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_func_param_0]
; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_func_param_1]
; CHECK: .param .align 16 .b8 param1[16];
; CHECK: st.param.v2.b64 [param1+0], {%[[REG2]], %[[REG3]]}
; CHECK: } // callseq [[CALLSEQ_ID]]
- call void @callee(i128 %0, i128 %1, i128* %2)
+ call void @callee(i128 %0, i128 %1, ptr %2)
ret void
}
}
; CHECK-LABEL: .visible .func caller(
-define void @caller(i128, i128*) {
+define void @caller(i128, ptr) {
start:
; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_param_0];
; CHECK-DAG: ld.param.u64 %[[OUT:rd[0-9]+]], [caller_param_1];
; CHECK-DAG: st.u64 [%[[OUT]]], %[[REG2]];
; CHECK-DAG: st.u64 [%[[OUT]]+8], %[[REG3]];
- store i128 %a, i128* %1
+ store i128 %a, ptr %1
ret void
}
}\r
\r
; CHECK: .visible .func caller\r
-define void @caller(i8* %a) {\r
+define void @caller(ptr %a) {\r
; CHECK: ld.u8\r
- %val = load i8, i8* %a\r
+ %val = load i8, ptr %a\r
%ret = tail call i8 @callee(i8 %val)\r
; CHECK: ld.param.b32\r
- store i8 %ret, i8* %a\r
+ store i8 %ret, ptr %a\r
ret void\r
}\r
\r
; check: //TEST baz
;@baz = internal global i32 0, align 4
;define dso_local i32 @test_inlineasm_c_output_template1() {
-; tail call void asm sideeffect "//TEST ${0:c}", "i"(i32* nonnull @baz)
+; tail call void asm sideeffect "//TEST ${0:c}", "i"(ptr nonnull @baz)
; ret i32 42
;}
; optimizations (such as the store below being eliminated as dead code). This
; test makes sure we don't regress.
-declare void @foo(i32 addrspace(1)*)
+declare void @foo(ptr addrspace(1))
-declare i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32*)
+declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr)
; CHECK: @bar
define void @bar() {
%t1 = alloca i32
-; CHECK: call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* nonnull %t1)
-; CHECK-NEXT: store i32 10, i32* %t1
- %t2 = call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* %t1)
- store i32 10, i32* %t1
- call void @foo(i32 addrspace(1)* %t2)
+; CHECK: call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr nonnull %t1)
+; CHECK-NEXT: store i32 10, ptr %t1
+ %t2 = call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr %t1)
+ store i32 10, ptr %t1
+ call void @foo(ptr addrspace(1) %t2)
ret void
}
; llvm.ctpop.i16 is implemenented by converting to i32, running popc.b32, and
; then converting back to i16.
; CHECK-LABEL: test_popc16
-define void @test_popc16(i16 %a, i16* %b) {
+define void @test_popc16(i16 %a, ptr %b) {
; CHECK: cvt.u32.u16
; CHECK: popc.b32
; CHECK: cvt.u16.u32
%val = call i16 @llvm.ctpop.i16(i16 %a)
- store i16 %val, i16* %b
+ store i16 %val, ptr %b
ret void
}
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
-declare i1 @llvm.nvvm.isspacep.const(i8*) readnone noinline
-declare i1 @llvm.nvvm.isspacep.global(i8*) readnone noinline
-declare i1 @llvm.nvvm.isspacep.local(i8*) readnone noinline
-declare i1 @llvm.nvvm.isspacep.shared(i8*) readnone noinline
+declare i1 @llvm.nvvm.isspacep.const(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.global(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.local(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.shared(ptr) readnone noinline
; CHECK: is_const
-define i1 @is_const(i8* %addr) {
+define i1 @is_const(ptr %addr) {
; CHECK: isspacep.const
- %v = tail call i1 @llvm.nvvm.isspacep.const(i8* %addr)
+ %v = tail call i1 @llvm.nvvm.isspacep.const(ptr %addr)
ret i1 %v
}
; CHECK: is_global
-define i1 @is_global(i8* %addr) {
+define i1 @is_global(ptr %addr) {
; CHECK: isspacep.global
- %v = tail call i1 @llvm.nvvm.isspacep.global(i8* %addr)
+ %v = tail call i1 @llvm.nvvm.isspacep.global(ptr %addr)
ret i1 %v
}
; CHECK: is_local
-define i1 @is_local(i8* %addr) {
+define i1 @is_local(ptr %addr) {
; CHECK: isspacep.local
- %v = tail call i1 @llvm.nvvm.isspacep.local(i8* %addr)
+ %v = tail call i1 @llvm.nvvm.isspacep.local(ptr %addr)
ret i1 %v
}
; CHECK: is_shared
-define i1 @is_shared(i8* %addr) {
+define i1 @is_shared(ptr %addr) {
; CHECK: isspacep.shared
- %v = tail call i1 @llvm.nvvm.isspacep.shared(i8* %addr)
+ %v = tail call i1 @llvm.nvvm.isspacep.shared(ptr %addr)
ret i1 %v
}
;; i8
-define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
+define i8 @ld_global_i8(ptr addrspace(0) %ptr) {
; PTX32: ld.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i8, i8 addrspace(0)* %ptr
+ %a = load i8, ptr addrspace(0) %ptr
ret i8 %a
}
;; i16
-define i16 @ld_global_i16(i16 addrspace(0)* %ptr) {
+define i16 @ld_global_i16(ptr addrspace(0) %ptr) {
; PTX32: ld.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i16, i16 addrspace(0)* %ptr
+ %a = load i16, ptr addrspace(0) %ptr
ret i16 %a
}
;; i32
-define i32 @ld_global_i32(i32 addrspace(0)* %ptr) {
+define i32 @ld_global_i32(ptr addrspace(0) %ptr) {
; PTX32: ld.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i32, i32 addrspace(0)* %ptr
+ %a = load i32, ptr addrspace(0) %ptr
ret i32 %a
}
;; i64
-define i64 @ld_global_i64(i64 addrspace(0)* %ptr) {
+define i64 @ld_global_i64(ptr addrspace(0) %ptr) {
; PTX32: ld.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i64, i64 addrspace(0)* %ptr
+ %a = load i64, ptr addrspace(0) %ptr
ret i64 %a
}
;; f32
-define float @ld_global_f32(float addrspace(0)* %ptr) {
+define float @ld_global_f32(ptr addrspace(0) %ptr) {
; PTX32: ld.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load float, float addrspace(0)* %ptr
+ %a = load float, ptr addrspace(0) %ptr
ret float %a
}
;; f64
-define double @ld_global_f64(double addrspace(0)* %ptr) {
+define double @ld_global_f64(ptr addrspace(0) %ptr) {
; PTX32: ld.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load double, double addrspace(0)* %ptr
+ %a = load double, ptr addrspace(0) %ptr
ret double %a
}
; ld.global.nc.
; CHECK-LABEL: @ld_global
-define i32 @ld_global(i32 addrspace(1)* %ptr) {
+define i32 @ld_global(ptr addrspace(1) %ptr) {
; CHECK: ld.global.nc.{{[a-z]}}32
- %a = load i32, i32 addrspace(1)* %ptr, !invariant.load !0
+ %a = load i32, ptr addrspace(1) %ptr, !invariant.load !0
ret i32 %a
}
; CHECK-LABEL: @ld_global_v2f16
-define half @ld_global_v2f16(<2 x half> addrspace(1)* %ptr) {
+define half @ld_global_v2f16(ptr addrspace(1) %ptr) {
; Load of v2f16 is weird. We consider it to be a legal type, which happens to be
; loaded/stored as a 32-bit scalar.
; CHECK: ld.global.nc.b32
- %a = load <2 x half>, <2 x half> addrspace(1)* %ptr, !invariant.load !0
+ %a = load <2 x half>, ptr addrspace(1) %ptr, !invariant.load !0
%v1 = extractelement <2 x half> %a, i32 0
%v2 = extractelement <2 x half> %a, i32 1
%sum = fadd half %v1, %v2
}
; CHECK-LABEL: @ld_global_v4f16
-define half @ld_global_v4f16(<4 x half> addrspace(1)* %ptr) {
+define half @ld_global_v4f16(ptr addrspace(1) %ptr) {
; Larger f16 vectors may be split into individual f16 elements and multiple
; loads/stores may be vectorized using f16 element type. Practically it's
; limited to v4 variant only.
; CHECK: ld.global.nc.v4.b16
- %a = load <4 x half>, <4 x half> addrspace(1)* %ptr, !invariant.load !0
+ %a = load <4 x half>, ptr addrspace(1) %ptr, !invariant.load !0
%v1 = extractelement <4 x half> %a, i32 0
%v2 = extractelement <4 x half> %a, i32 1
%v3 = extractelement <4 x half> %a, i32 2
}
; CHECK-LABEL: @ld_global_v8f16
-define half @ld_global_v8f16(<8 x half> addrspace(1)* %ptr) {
+define half @ld_global_v8f16(ptr addrspace(1) %ptr) {
; Larger vectors are, again, loaded as v4i32. PTX has no v8 variants of loads/stores,
; so load/store vectorizer has to convert v8f16 -> v4 x v2f16.
; CHECK: ld.global.nc.v4.b32
- %a = load <8 x half>, <8 x half> addrspace(1)* %ptr, !invariant.load !0
+ %a = load <8 x half>, ptr addrspace(1) %ptr, !invariant.load !0
%v1 = extractelement <8 x half> %a, i32 0
%v2 = extractelement <8 x half> %a, i32 2
%v3 = extractelement <8 x half> %a, i32 4
}
; CHECK-LABEL: @ld_global_v2i32
-define i32 @ld_global_v2i32(<2 x i32> addrspace(1)* %ptr) {
+define i32 @ld_global_v2i32(ptr addrspace(1) %ptr) {
; CHECK: ld.global.nc.v2.{{[a-z]}}32
- %a = load <2 x i32>, <2 x i32> addrspace(1)* %ptr, !invariant.load !0
+ %a = load <2 x i32>, ptr addrspace(1) %ptr, !invariant.load !0
%v1 = extractelement <2 x i32> %a, i32 0
%v2 = extractelement <2 x i32> %a, i32 1
%sum = add i32 %v1, %v2
}
; CHECK-LABEL: @ld_global_v4i32
-define i32 @ld_global_v4i32(<4 x i32> addrspace(1)* %ptr) {
+define i32 @ld_global_v4i32(ptr addrspace(1) %ptr) {
; CHECK: ld.global.nc.v4.{{[a-z]}}32
- %a = load <4 x i32>, <4 x i32> addrspace(1)* %ptr, !invariant.load !0
+ %a = load <4 x i32>, ptr addrspace(1) %ptr, !invariant.load !0
%v1 = extractelement <4 x i32> %a, i32 0
%v2 = extractelement <4 x i32> %a, i32 1
%v3 = extractelement <4 x i32> %a, i32 2
}
; CHECK-LABEL: @ld_not_invariant
-define i32 @ld_not_invariant(i32 addrspace(1)* %ptr) {
+define i32 @ld_not_invariant(ptr addrspace(1) %ptr) {
; CHECK: ld.global.{{[a-z]}}32
- %a = load i32, i32 addrspace(1)* %ptr
+ %a = load i32, ptr addrspace(1) %ptr
ret i32 %a
}
; CHECK-LABEL: @ld_not_global_addrspace
-define i32 @ld_not_global_addrspace(i32 addrspace(0)* %ptr) {
+define i32 @ld_not_global_addrspace(ptr addrspace(0) %ptr) {
; CHECK: ld.{{[a-z]}}32
- %a = load i32, i32 addrspace(0)* %ptr
+ %a = load i32, ptr addrspace(0) %ptr
ret i32 %a
}
declare <4 x float> @bar()
; CHECK-LABEL: .func foo(
-define void @foo(<4 x float>* %ptr) {
+define void @foo(ptr %ptr) {
; CHECK: ld.param.u32 %[[PTR:r[0-9]+]], [foo_param_0];
; CHECK: ld.param.v4.f32 {[[E0:%f[0-9]+]], [[E1:%f[0-9]+]], [[E2:%f[0-9]+]], [[E3:%f[0-9]+]]}, [retval0+0];
; CHECK: st.v4.f32 [%[[PTR]]], {[[E0]], [[E1]], [[E2]], [[E3]]}
%val = tail call <4 x float> @bar()
- store <4 x float> %val, <4 x float>* %ptr
+ store <4 x float> %val, ptr %ptr
ret void
}
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*, i32)
+declare i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr, i32)
-define i8 @foo(i8* %a) {
+define i8 @foo(ptr %a) {
; Ensure we properly truncate off the high-order 24 bits
; CHECK: ldu.global.u8
; CHECK: cvt.u32.u16
; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 255
- %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a, i32 4)
+ %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr %a, i32 4)
ret i8 %val
}
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_32 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_32%} %}
-declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align)
-declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align)
-declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align)
-declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align)
+declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
; CHECK: func0
-define i8 @func0(i8 addrspace(1)* %ptr) {
+define i8 @func0(ptr addrspace(1) %ptr) {
; ldu.global.u8
- %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4)
+ %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
ret i8 %val
}
; CHECK: func1
-define i32 @func1(i32 addrspace(1)* %ptr) {
+define i32 @func1(ptr addrspace(1) %ptr) {
; ldu.global.u32
- %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4)
+ %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
ret i32 %val
}
; CHECK: func2
-define i8 @func2(i8 addrspace(1)* %ptr) {
+define i8 @func2(ptr addrspace(1) %ptr) {
; ld.global.nc.u8
- %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4)
+ %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
ret i8 %val
}
; CHECK: func3
-define i32 @func3(i32 addrspace(1)* %ptr) {
+define i32 @func3(ptr addrspace(1) %ptr) {
; ld.global.nc.u32
- %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4)
+ %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
ret i32 %val
}
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-define void @reg_plus_offset(i32* %a) {
+define void @reg_plus_offset(ptr %a) {
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
- %p2 = getelementptr i32, i32* %a, i32 8
- %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2, i32 4)
- %p3 = getelementptr i32, i32* %a, i32 9
- %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3, i32 4)
+ %p2 = getelementptr i32, ptr %a, i32 8
+ %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr %p2, i32 4)
+ %p3 = getelementptr i32, ptr %a, i32 9
+ %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr %p3, i32 4)
%t3 = mul i32 %t1, %t2
- store i32 %t3, i32* %a
+ store i32 %t3, ptr %a
ret void
}
-declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*, i32)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr, i32)
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; Allow to make libcalls that are defined in the current module
-declare i8* @malloc(i64)
-declare void @free(i8*)
+declare ptr @malloc(i64)
+declare void @free(ptr)
; Underlying libcall declaration
; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) __umodti3
; CHECK: malloc,
; CHECK: call.uni
; CHECK: free,
- %a = call i8* @malloc(i64 4)
- store i8 0, i8* %a
- call void @free(i8* %a)
+ %a = call ptr @malloc(i64 4)
+ store i8 0, ptr %a
+ call void @free(ptr %a)
ret void
}
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
target triple = "nvptx-nvidia-cuda"
-define void @main(i1* %a1, i32 %a2, i32* %arg3) {
+define void @main(ptr %a1, i32 %a2, ptr %arg3) {
; CHECK: ld.u8
; CHECK-NOT: ld.u1
- %t1 = getelementptr i1, i1* %a1, i32 %a2
- %t2 = load i1, i1* %t1
+ %t1 = getelementptr i1, ptr %a1, i32 %a2
+ %t2 = load i1, ptr %t1
%t3 = sext i1 %t2 to i32
- store i32 %t3, i32* %arg3
+ store i32 %t3, ptr %arg3
ret void
}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; CHECK-LABEL: plain
-define void @plain(i8* %a, i16* %b, i32* %c, i64* %d) local_unnamed_addr {
+define void @plain(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
; CHECK: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load i8, i8* %a
+ %a.load = load i8, ptr %a
%a.add = add i8 %a.load, 1
; CHECK: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i8 %a.add, i8* %a
+ store i8 %a.add, ptr %a
; CHECK: ld.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load i16, i16* %b
+ %b.load = load i16, ptr %b
%b.add = add i16 %b.load, 1
; CHECK: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i16 %b.add, i16* %b
+ store i16 %b.add, ptr %b
; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load i32, i32* %c
+ %c.load = load i32, ptr %c
%c.add = add i32 %c.load, 1
; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store i32 %c.add, i32* %c
+ store i32 %c.add, ptr %c
; CHECK: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load i64, i64* %d
+ %d.load = load i64, ptr %d
%d.add = add i64 %d.load, 1
; CHECK: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store i64 %d.add, i64* %d
+ store i64 %d.add, ptr %d
ret void
}
; CHECK-LABEL: volatile
-define void @volatile(i8* %a, i16* %b, i32* %c, i64* %d) local_unnamed_addr {
+define void @volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load volatile i8, i8* %a
+ %a.load = load volatile i8, ptr %a
%a.add = add i8 %a.load, 1
; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i8 %a.add, i8* %a
+ store volatile i8 %a.add, ptr %a
; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load volatile i16, i16* %b
+ %b.load = load volatile i16, ptr %b
%b.add = add i16 %b.load, 1
; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i16 %b.add, i16* %b
+ store volatile i16 %b.add, ptr %b
; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load volatile i32, i32* %c
+ %c.load = load volatile i32, ptr %c
%c.add = add i32 %c.load, 1
; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store volatile i32 %c.add, i32* %c
+ store volatile i32 %c.add, ptr %c
; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load volatile i64, i64* %d
+ %d.load = load volatile i64, ptr %d
%d.add = add i64 %d.load, 1
; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store volatile i64 %d.add, i64* %d
+ store volatile i64 %d.add, ptr %d
ret void
}
; CHECK-LABEL: monotonic
-define void @monotonic(i8* %a, i16* %b, i32* %c, i64* %d, float* %e) local_unnamed_addr {
+define void @monotonic(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, i8* %a monotonic, align 1
+ %a.load = load atomic i8, ptr %a monotonic, align 1
%a.add = add i8 %a.load, 1
; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, i8* %a monotonic, align 1
+ store atomic i8 %a.add, ptr %a monotonic, align 1
; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, i16* %b monotonic, align 2
+ %b.load = load atomic i16, ptr %b monotonic, align 2
%b.add = add i16 %b.load, 1
; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, i16* %b monotonic, align 2
+ store atomic i16 %b.add, ptr %b monotonic, align 2
; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, i32* %c monotonic, align 4
+ %c.load = load atomic i32, ptr %c monotonic, align 4
%c.add = add i32 %c.load, 1
; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, i32* %c monotonic, align 4
+ store atomic i32 %c.add, ptr %c monotonic, align 4
; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, i64* %d monotonic, align 8
+ %d.load = load atomic i64, ptr %d monotonic, align 8
%d.add = add i64 %d.load, 1
; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, i64* %d monotonic, align 8
+ store atomic i64 %d.add, ptr %d monotonic, align 8
; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, float* %e monotonic, align 4
+ %e.load = load atomic float, ptr %e monotonic, align 4
%e.add = fadd float %e.load, 1.0
; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, float* %e monotonic, align 4
+ store atomic float %e.add, ptr %e monotonic, align 4
ret void
}
; SM20: ld.global.f32
; SM35-LABEL: .visible .entry foo1(
; SM35: ld.global.nc.f32
-define void @foo1(float * noalias readonly %from, float * %to) {
- %1 = load float, float * %from
- store float %1, float * %to
+define void @foo1(ptr noalias readonly %from, ptr %to) {
+ %1 = load float, ptr %from
+ store float %1, ptr %to
ret void
}
; SM20: ld.global.f64
; SM35-LABEL: .visible .entry foo2(
; SM35: ld.global.nc.f64
-define void @foo2(double * noalias readonly %from, double * %to) {
- %1 = load double, double * %from
- store double %1, double * %to
+define void @foo2(ptr noalias readonly %from, ptr %to) {
+ %1 = load double, ptr %from
+ store double %1, ptr %to
ret void
}
; SM20: ld.global.u16
; SM35-LABEL: .visible .entry foo3(
; SM35: ld.global.nc.u16
-define void @foo3(i16 * noalias readonly %from, i16 * %to) {
- %1 = load i16, i16 * %from
- store i16 %1, i16 * %to
+define void @foo3(ptr noalias readonly %from, ptr %to) {
+ %1 = load i16, ptr %from
+ store i16 %1, ptr %to
ret void
}
; SM20: ld.global.u32
; SM35-LABEL: .visible .entry foo4(
; SM35: ld.global.nc.u32
-define void @foo4(i32 * noalias readonly %from, i32 * %to) {
- %1 = load i32, i32 * %from
- store i32 %1, i32 * %to
+define void @foo4(ptr noalias readonly %from, ptr %to) {
+ %1 = load i32, ptr %from
+ store i32 %1, ptr %to
ret void
}
; SM20: ld.global.u64
; SM35-LABEL: .visible .entry foo5(
; SM35: ld.global.nc.u64
-define void @foo5(i64 * noalias readonly %from, i64 * %to) {
- %1 = load i64, i64 * %from
- store i64 %1, i64 * %to
+define void @foo5(ptr noalias readonly %from, ptr %to) {
+ %1 = load i64, ptr %from
+ store i64 %1, ptr %to
ret void
}
; SM35-LABEL: .visible .entry foo6(
; SM35: ld.global.nc.u64
; SM35: ld.global.nc.u64
-define void @foo6(i128 * noalias readonly %from, i128 * %to) {
- %1 = load i128, i128 * %from
- store i128 %1, i128 * %to
+define void @foo6(ptr noalias readonly %from, ptr %to) {
+ %1 = load i128, ptr %from
+ store i128 %1, ptr %to
ret void
}
; SM20: ld.global.v2.u8
; SM35-LABEL: .visible .entry foo7(
; SM35: ld.global.nc.v2.u8
-define void @foo7(<2 x i8> * noalias readonly %from, <2 x i8> * %to) {
- %1 = load <2 x i8>, <2 x i8> * %from
- store <2 x i8> %1, <2 x i8> * %to
+define void @foo7(ptr noalias readonly %from, ptr %to) {
+ %1 = load <2 x i8>, ptr %from
+ store <2 x i8> %1, ptr %to
ret void
}
; SM20: ld.global.v2.u16
; SM35-LABEL: .visible .entry foo8(
; SM35: ld.global.nc.v2.u16
-define void @foo8(<2 x i16> * noalias readonly %from, <2 x i16> * %to) {
- %1 = load <2 x i16>, <2 x i16> * %from
- store <2 x i16> %1, <2 x i16> * %to
+define void @foo8(ptr noalias readonly %from, ptr %to) {
+ %1 = load <2 x i16>, ptr %from
+ store <2 x i16> %1, ptr %to
ret void
}
; SM20: ld.global.v2.u32
; SM35-LABEL: .visible .entry foo9(
; SM35: ld.global.nc.v2.u32
-define void @foo9(<2 x i32> * noalias readonly %from, <2 x i32> * %to) {
- %1 = load <2 x i32>, <2 x i32> * %from
- store <2 x i32> %1, <2 x i32> * %to
+define void @foo9(ptr noalias readonly %from, ptr %to) {
+ %1 = load <2 x i32>, ptr %from
+ store <2 x i32> %1, ptr %to
ret void
}
; SM20: ld.global.v2.u64
; SM35-LABEL: .visible .entry foo10(
; SM35: ld.global.nc.v2.u64
-define void @foo10(<2 x i64> * noalias readonly %from, <2 x i64> * %to) {
- %1 = load <2 x i64>, <2 x i64> * %from
- store <2 x i64> %1, <2 x i64> * %to
+define void @foo10(ptr noalias readonly %from, ptr %to) {
+ %1 = load <2 x i64>, ptr %from
+ store <2 x i64> %1, ptr %to
ret void
}
; SM20: ld.global.v2.f32
; SM35-LABEL: .visible .entry foo11(
; SM35: ld.global.nc.v2.f32
-define void @foo11(<2 x float> * noalias readonly %from, <2 x float> * %to) {
- %1 = load <2 x float>, <2 x float> * %from
- store <2 x float> %1, <2 x float> * %to
+define void @foo11(ptr noalias readonly %from, ptr %to) {
+ %1 = load <2 x float>, ptr %from
+ store <2 x float> %1, ptr %to
ret void
}
; SM20: ld.global.v2.f64
; SM35-LABEL: .visible .entry foo12(
; SM35: ld.global.nc.v2.f64
-define void @foo12(<2 x double> * noalias readonly %from, <2 x double> * %to) {
- %1 = load <2 x double>, <2 x double> * %from
- store <2 x double> %1, <2 x double> * %to
+define void @foo12(ptr noalias readonly %from, ptr %to) {
+ %1 = load <2 x double>, ptr %from
+ store <2 x double> %1, ptr %to
ret void
}
; SM20: ld.global.v4.u8
; SM35-LABEL: .visible .entry foo13(
; SM35: ld.global.nc.v4.u8
-define void @foo13(<4 x i8> * noalias readonly %from, <4 x i8> * %to) {
- %1 = load <4 x i8>, <4 x i8> * %from
- store <4 x i8> %1, <4 x i8> * %to
+define void @foo13(ptr noalias readonly %from, ptr %to) {
+ %1 = load <4 x i8>, ptr %from
+ store <4 x i8> %1, ptr %to
ret void
}
; SM20: ld.global.v4.u16
; SM35-LABEL: .visible .entry foo14(
; SM35: ld.global.nc.v4.u16
-define void @foo14(<4 x i16> * noalias readonly %from, <4 x i16> * %to) {
- %1 = load <4 x i16>, <4 x i16> * %from
- store <4 x i16> %1, <4 x i16> * %to
+define void @foo14(ptr noalias readonly %from, ptr %to) {
+ %1 = load <4 x i16>, ptr %from
+ store <4 x i16> %1, ptr %to
ret void
}
; SM20: ld.global.v4.u32
; SM35-LABEL: .visible .entry foo15(
; SM35: ld.global.nc.v4.u32
-define void @foo15(<4 x i32> * noalias readonly %from, <4 x i32> * %to) {
- %1 = load <4 x i32>, <4 x i32> * %from
- store <4 x i32> %1, <4 x i32> * %to
+define void @foo15(ptr noalias readonly %from, ptr %to) {
+ %1 = load <4 x i32>, ptr %from
+ store <4 x i32> %1, ptr %to
ret void
}
; SM20: ld.global.v4.f32
; SM35-LABEL: .visible .entry foo16(
; SM35: ld.global.nc.v4.f32
-define void @foo16(<4 x float> * noalias readonly %from, <4 x float> * %to) {
- %1 = load <4 x float>, <4 x float> * %from
- store <4 x float> %1, <4 x float> * %to
+define void @foo16(ptr noalias readonly %from, ptr %to) {
+ %1 = load <4 x float>, ptr %from
+ store <4 x float> %1, ptr %to
ret void
}
; SM35-LABEL: .visible .entry foo17(
; SM35: ld.global.nc.v2.f64
; SM35: ld.global.nc.v2.f64
-define void @foo17(<4 x double> * noalias readonly %from, <4 x double> * %to) {
- %1 = load <4 x double>, <4 x double> * %from
- store <4 x double> %1, <4 x double> * %to
+define void @foo17(ptr noalias readonly %from, ptr %to) {
+ %1 = load <4 x double>, ptr %from
+ store <4 x double> %1, ptr %to
ret void
}
; SM20: ld.global.u64
; SM35-LABEL: .visible .entry foo18(
; SM35: ld.global.nc.u64
-define void @foo18(float ** noalias readonly %from, float ** %to) {
- %1 = load float *, float ** %from
- store float * %1, float ** %to
+define void @foo18(ptr noalias readonly %from, ptr %to) {
+ %1 = load ptr, ptr %from
+ store ptr %1, ptr %to
ret void
}
; SM20: ld.global.f32
; SM35-LABEL: .visible .entry foo19(
; SM35: ld.global.nc.f32
-define void @foo19(float * noalias readonly %from, float * %to, i32 %n) {
+define void @foo19(ptr noalias readonly %from, ptr %to, i32 %n) {
entry:
br label %loop
loop:
%i = phi i32 [ 0, %entry ], [ %nexti, %loop ]
%sum = phi float [ 0.0, %entry ], [ %nextsum, %loop ]
- %ptr = getelementptr inbounds float, float * %from, i32 %i
- %value = load float, float * %ptr, align 4
+ %ptr = getelementptr inbounds float, ptr %from, i32 %i
+ %value = load float, ptr %ptr, align 4
%nextsum = fadd float %value, %sum
%nexti = add nsw i32 %i, 1
%exitcond = icmp eq i32 %nexti, %n
br i1 %exitcond, label %exit, label %loop
exit:
- store float %nextsum, float * %to
+ store float %nextsum, ptr %to
ret void
}
; SM20: ld.f32
; SM35-LABEL: notkernel(
; SM35: ld.f32
-define void @notkernel(float * noalias readonly %from, float * %to) {
- %1 = load float, float * %from
- store float %1, float * %to
+define void @notkernel(ptr noalias readonly %from, ptr %to) {
+ %1 = load float, ptr %from
+ store float %1, ptr %to
ret void
}
; SM20: ld.global.f32
; SM35-LABEL: notkernel2(
; SM35: ld.global.f32
-define void @notkernel2(float addrspace(1) * noalias readonly %from, float * %to) {
- %1 = load float, float addrspace(1) * %from
- store float %1, float * %to
+define void @notkernel2(ptr addrspace(1) noalias readonly %from, ptr %to) {
+ %1 = load float, ptr addrspace(1) %from
+ store float %1, ptr %to
ret void
}
!nvvm.annotations = !{!1 ,!2 ,!3 ,!4 ,!5 ,!6, !7 ,!8 ,!9 ,!10 ,!11 ,!12, !13, !14, !15, !16, !17, !18, !19}
-!1 = !{void (float *, float *)* @foo1, !"kernel", i32 1}
-!2 = !{void (double *, double *)* @foo2, !"kernel", i32 1}
-!3 = !{void (i16 *, i16 *)* @foo3, !"kernel", i32 1}
-!4 = !{void (i32 *, i32 *)* @foo4, !"kernel", i32 1}
-!5 = !{void (i64 *, i64 *)* @foo5, !"kernel", i32 1}
-!6 = !{void (i128 *, i128 *)* @foo6, !"kernel", i32 1}
-!7 = !{void (<2 x i8> *, <2 x i8> *)* @foo7, !"kernel", i32 1}
-!8 = !{void (<2 x i16> *, <2 x i16> *)* @foo8, !"kernel", i32 1}
-!9 = !{void (<2 x i32> *, <2 x i32> *)* @foo9, !"kernel", i32 1}
-!10 = !{void (<2 x i64> *, <2 x i64> *)* @foo10, !"kernel", i32 1}
-!11 = !{void (<2 x float> *, <2 x float> *)* @foo11, !"kernel", i32 1}
-!12 = !{void (<2 x double> *, <2 x double> *)* @foo12, !"kernel", i32 1}
-!13 = !{void (<4 x i8> *, <4 x i8> *)* @foo13, !"kernel", i32 1}
-!14 = !{void (<4 x i16> *, <4 x i16> *)* @foo14, !"kernel", i32 1}
-!15 = !{void (<4 x i32> *, <4 x i32> *)* @foo15, !"kernel", i32 1}
-!16 = !{void (<4 x float> *, <4 x float> *)* @foo16, !"kernel", i32 1}
-!17 = !{void (<4 x double> *, <4 x double> *)* @foo17, !"kernel", i32 1}
-!18 = !{void (float **, float **)* @foo18, !"kernel", i32 1}
-!19 = !{void (float *, float *, i32)* @foo19, !"kernel", i32 1}
+!1 = !{ptr @foo1, !"kernel", i32 1}
+!2 = !{ptr @foo2, !"kernel", i32 1}
+!3 = !{ptr @foo3, !"kernel", i32 1}
+!4 = !{ptr @foo4, !"kernel", i32 1}
+!5 = !{ptr @foo5, !"kernel", i32 1}
+!6 = !{ptr @foo6, !"kernel", i32 1}
+!7 = !{ptr @foo7, !"kernel", i32 1}
+!8 = !{ptr @foo8, !"kernel", i32 1}
+!9 = !{ptr @foo9, !"kernel", i32 1}
+!10 = !{ptr @foo10, !"kernel", i32 1}
+!11 = !{ptr @foo11, !"kernel", i32 1}
+!12 = !{ptr @foo12, !"kernel", i32 1}
+!13 = !{ptr @foo13, !"kernel", i32 1}
+!14 = !{ptr @foo14, !"kernel", i32 1}
+!15 = !{ptr @foo15, !"kernel", i32 1}
+!16 = !{ptr @foo16, !"kernel", i32 1}
+!17 = !{ptr @foo17, !"kernel", i32 1}
+!18 = !{ptr @foo18, !"kernel", i32 1}
+!19 = !{ptr @foo19, !"kernel", i32 1}
; PTX64: st.volatile.u32 [%SP+0], %r{{[0-9]+}};
define void @foo(i32 %a) {
%local = alloca i32, align 4
- store volatile i32 %a, i32* %local
+ store volatile i32 %a, ptr %local
ret void
}
; PTX64: st.local.u32 [%rd[[SP_REG]]], %r{{[0-9]+}};
define void @foo2(i32 %a) {
%local = alloca i32, align 4
- store i32 %a, i32* %local
- call void @bar(i32* %local)
+ store i32 %a, ptr %local
+ call void @bar(ptr %local)
ret void
}
-declare void @bar(i32* %a)
+declare void @bar(ptr %a)
!nvvm.annotations = !{!0}
-!0 = !{void (i32)* @foo2, !"kernel", i32 1}
+!0 = !{ptr @foo2, !"kernel", i32 1}
; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
; PTX32-NOT: cvta.local.u32 %SP, %SPL;
; PTX64: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}};
define void @foo3(i32 %a) {
%local = alloca [3 x i32], align 4
- %1 = bitcast [3 x i32]* %local to i32*
- %2 = getelementptr inbounds i32, i32* %1, i32 %a
- store i32 %a, i32* %2
+ %1 = getelementptr inbounds i32, ptr %local, i32 %a
+ store i32 %a, ptr %1
ret void
}
define void @foo4() {
%A = alloca i32
%B = alloca i32
- store i32 0, i32* %A
- store i32 0, i32* %B
- call void @bar(i32* %A)
- call void @bar(i32* %B)
+ store i32 0, ptr %A
+ store i32 0, ptr %B
+ call void @bar(ptr %A)
+ call void @bar(ptr %B)
ret void
}
attributes #0 = { nounwind }
!nvvm.annotations = !{!0}
-!0 = !{void (i32, i32, i32)* @no_vectorization, !"kernel", i32 1}
+!0 = !{ptr @no_vectorization, !"kernel", i32 1}
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "nvptx64-unknown-unknown"
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1
-declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #1
+declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) #1
+declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) #1
+declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) #1
-define i8* @memcpy_caller(i8* %dst, i8* %src, i64 %n) #0 {
+define ptr @memcpy_caller(ptr %dst, ptr %src, i64 %n) #0 {
entry:
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i1 false)
- ret i8* %dst
+ tail call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %n, i1 false)
+ ret ptr %dst
; IR-LABEL: @memcpy_caller
; IR: entry:
; IR: loop-memcpy-expansion:
; IR: %loop-index = phi i64 [ 0, %entry ], [ [[IndexInc:%[0-9]+]], %loop-memcpy-expansion ]
-; IR: [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64 %loop-index
-; IR: [[Load:%[0-9]+]] = load i8, i8* [[SrcGep]]
-; IR: [[DstGep:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 %loop-index
-; IR: store i8 [[Load]], i8* [[DstGep]]
+; IR: [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, ptr %src, i64 %loop-index
+; IR: [[Load:%[0-9]+]] = load i8, ptr [[SrcGep]]
+; IR: [[DstGep:%[0-9]+]] = getelementptr inbounds i8, ptr %dst, i64 %loop-index
+; IR: store i8 [[Load]], ptr [[DstGep]]
; IR: [[IndexInc]] = add i64 %loop-index, 1
; IR: [[Cond2:%[0-9]+]] = icmp ult i64 [[IndexInc]], %n
; IR: br i1 [[Cond2]], label %loop-memcpy-expansion, label %post-loop-memcpy-expansion
; IR-LABEL: post-loop-memcpy-expansion:
-; IR: ret i8* %dst
+; IR: ret ptr %dst
; PTX-LABEL: .visible .func (.param .b64 func_retval0) memcpy_caller
; PTX: $L__BB[[LABEL:[_0-9]+]]:
}
-define i8* @memcpy_volatile_caller(i8* %dst, i8* %src, i64 %n) #0 {
+define ptr @memcpy_volatile_caller(ptr %dst, ptr %src, i64 %n) #0 {
entry:
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i1 true)
- ret i8* %dst
+ tail call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %n, i1 true)
+ ret ptr %dst
; IR-LABEL: @memcpy_volatile_caller
; IR: entry:
; IR: loop-memcpy-expansion:
; IR: %loop-index = phi i64 [ 0, %entry ], [ [[IndexInc:%[0-9]+]], %loop-memcpy-expansion ]
-; IR: [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64 %loop-index
-; IR: [[Load:%[0-9]+]] = load volatile i8, i8* [[SrcGep]]
-; IR: [[DstGep:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 %loop-index
-; IR: store volatile i8 [[Load]], i8* [[DstGep]]
+; IR: [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, ptr %src, i64 %loop-index
+; IR: [[Load:%[0-9]+]] = load volatile i8, ptr [[SrcGep]]
+; IR: [[DstGep:%[0-9]+]] = getelementptr inbounds i8, ptr %dst, i64 %loop-index
+; IR: store volatile i8 [[Load]], ptr [[DstGep]]
; IR: [[IndexInc]] = add i64 %loop-index, 1
; IR: [[Cond2:%[0-9]+]] = icmp ult i64 [[IndexInc]], %n
; IR: br i1 [[Cond2]], label %loop-memcpy-expansion, label %post-loop-memcpy-expansion
; IR-LABEL: post-loop-memcpy-expansion:
-; IR: ret i8* %dst
+; IR: ret ptr %dst
; PTX-LABEL: .visible .func (.param .b64 func_retval0) memcpy_volatile_caller
; PTX: @%p[[PRED]] bra $L__BB[[LABEL]]
}
-define i8* @memcpy_casting_caller(i32* %dst, i32* %src, i64 %n) #0 {
+define ptr @memcpy_casting_caller(ptr %dst, ptr %src, i64 %n) #0 {
entry:
- %0 = bitcast i32* %dst to i8*
- %1 = bitcast i32* %src to i8*
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 %n, i1 false)
- ret i8* %0
+ tail call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %n, i1 false)
+ ret ptr %dst
; Check that casts in calls to memcpy are handled properly
; IR-LABEL: @memcpy_casting_caller
-; IR: [[DSTCAST:%[0-9]+]] = bitcast i32* %dst to i8*
-; IR: [[SRCCAST:%[0-9]+]] = bitcast i32* %src to i8*
-; IR: getelementptr inbounds i8, i8* [[SRCCAST]]
-; IR: getelementptr inbounds i8, i8* [[DSTCAST]]
+; IR: getelementptr inbounds i8, ptr %src
+; IR: getelementptr inbounds i8, ptr %dst
}
-define i8* @memcpy_known_size(i8* %dst, i8* %src) {
+define ptr @memcpy_known_size(ptr %dst, ptr %src) {
entry:
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 144, i1 false)
- ret i8* %dst
+ tail call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 144, i1 false)
+ ret ptr %dst
; Check that calls with compile-time constant size are handled correctly
; IR-LABEL: @memcpy_known_size
; IR: br label %load-store-loop
; IR: load-store-loop:
; IR: %loop-index = phi i64 [ 0, %entry ], [ [[IndexInc:%[0-9]+]], %load-store-loop ]
-; IR: [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64 %loop-index
-; IR: [[Load:%[0-9]+]] = load i8, i8* [[SrcGep]]
-; IR: [[DstGep:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 %loop-index
-; IR: store i8 [[Load]], i8* [[DstGep]]
+; IR: [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, ptr %src, i64 %loop-index
+; IR: [[Load:%[0-9]+]] = load i8, ptr [[SrcGep]]
+; IR: [[DstGep:%[0-9]+]] = getelementptr inbounds i8, ptr %dst, i64 %loop-index
+; IR: store i8 [[Load]], ptr [[DstGep]]
; IR: [[IndexInc]] = add i64 %loop-index, 1
; IR: [[Cond:%[0-9]+]] = icmp ult i64 %3, 144
; IR: br i1 [[Cond]], label %load-store-loop, label %memcpy-split
}
-define i8* @memset_caller(i8* %dst, i32 %c, i64 %n) #0 {
+define ptr @memset_caller(ptr %dst, i32 %c, i64 %n) #0 {
entry:
%0 = trunc i32 %c to i8
- tail call void @llvm.memset.p0i8.i64(i8* %dst, i8 %0, i64 %n, i1 false)
- ret i8* %dst
+ tail call void @llvm.memset.p0.i64(ptr %dst, i8 %0, i64 %n, i1 false)
+ ret ptr %dst
; IR-LABEL: @memset_caller
; IR: [[VAL:%[0-9]+]] = trunc i32 %c to i8
; IR: [[CMPREG:%[0-9]+]] = icmp eq i64 0, %n
; IR: br i1 [[CMPREG]], label %split, label %loadstoreloop
; IR: loadstoreloop:
-; IR: [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64
-; IR-NEXT: store i8 [[VAL]], i8* [[STOREPTR]]
+; IR: [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, ptr %dst, i64
+; IR-NEXT: store i8 [[VAL]], ptr [[STOREPTR]]
; PTX-LABEL: .visible .func (.param .b64 func_retval0) memset_caller(
; PTX: ld.param.u32 %r[[C:[0-9]+]]
; PTX: @%p[[PRED]] bra $L__BB[[LABEL]]
}
-define i8* @volatile_memset_caller(i8* %dst, i32 %c, i64 %n) #0 {
+define ptr @volatile_memset_caller(ptr %dst, i32 %c, i64 %n) #0 {
entry:
%0 = trunc i32 %c to i8
- tail call void @llvm.memset.p0i8.i64(i8* %dst, i8 %0, i64 %n, i1 true)
- ret i8* %dst
+ tail call void @llvm.memset.p0.i64(ptr %dst, i8 %0, i64 %n, i1 true)
+ ret ptr %dst
; IR-LABEL: @volatile_memset_caller
; IR: [[VAL:%[0-9]+]] = trunc i32 %c to i8
; IR: loadstoreloop:
-; IR: [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64
-; IR-NEXT: store volatile i8 [[VAL]], i8* [[STOREPTR]]
+; IR: [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, ptr %dst, i64
+; IR-NEXT: store volatile i8 [[VAL]], ptr [[STOREPTR]]
}
-define i8* @memmove_caller(i8* %dst, i8* %src, i64 %n) #0 {
+define ptr @memmove_caller(ptr %dst, ptr %src, i64 %n) #0 {
entry:
- tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i1 false)
- ret i8* %dst
+ tail call void @llvm.memmove.p0.p0.i64(ptr %dst, ptr %src, i64 %n, i1 false)
+ ret ptr %dst
; IR-LABEL: @memmove_caller
-; IR: icmp ult i8* %src, %dst
+; IR: icmp ult ptr %src, %dst
; IR: [[PHIVAL:%[0-9a-zA-Z_]+]] = phi i64
; IR-NEXT: %index_ptr = sub i64 [[PHIVAL]], 1
; IR: [[FWDPHIVAL:%[0-9a-zA-Z_]+]] = phi i64
; LABEL: @lower_alloca
; PTX-LABEL: .visible .entry kernel(
%A = alloca i32
-; CHECK: addrspacecast i32* %A to i32 addrspace(5)*
-; CHECK: store i32 0, i32 addrspace(5)* {{%.+}}
+; CHECK: addrspacecast ptr %A to ptr addrspace(5)
+; CHECK: store i32 0, ptr addrspace(5) {{%.+}}
; PTX: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}}
- store i32 0, i32* %A
- call void @callee(i32* %A)
+ store i32 0, ptr %A
+ call void @callee(ptr %A)
ret void
}
-declare void @callee(i32*)
+declare void @callee(ptr)
!nvvm.annotations = !{!0}
-!0 = !{void ()* @kernel, !"kernel", i32 1}
+!0 = !{ptr @kernel, !"kernel", i32 1}
target triple = "nvptx64-nvidia-cuda"
%class.outer = type <{ %class.inner, i32, [4 x i8] }>
-%class.inner = type { i32*, i32* }
+%class.inner = type { ptr, ptr }
; Check that nvptx-lower-args preserves arg alignment
-define void @load_alignment(%class.outer* nocapture readonly byval(%class.outer) align 8 %arg) {
+define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %arg) {
entry:
-; IR: load %class.outer, %class.outer addrspace(101)*
+; IR: load %class.outer, ptr addrspace(101)
; IR-SAME: align 8
; PTX: ld.param.u64
; PTX-NOT: ld.param.u8
- %arg.idx = getelementptr %class.outer, %class.outer* %arg, i64 0, i32 0, i32 0
- %arg.idx.val = load i32*, i32** %arg.idx, align 8
- %arg.idx1 = getelementptr %class.outer, %class.outer* %arg, i64 0, i32 0, i32 1
- %arg.idx1.val = load i32*, i32** %arg.idx1, align 8
- %arg.idx2 = getelementptr %class.outer, %class.outer* %arg, i64 0, i32 1
- %arg.idx2.val = load i32, i32* %arg.idx2, align 8
- %arg.idx.val.val = load i32, i32* %arg.idx.val, align 4
+ %arg.idx.val = load ptr, ptr %arg, align 8
+ %arg.idx1 = getelementptr %class.outer, ptr %arg, i64 0, i32 0, i32 1
+ %arg.idx1.val = load ptr, ptr %arg.idx1, align 8
+ %arg.idx2 = getelementptr %class.outer, ptr %arg, i64 0, i32 1
+ %arg.idx2.val = load i32, ptr %arg.idx2, align 8
+ %arg.idx.val.val = load i32, ptr %arg.idx.val, align 4
%add.i = add nsw i32 %arg.idx.val.val, %arg.idx2.val
- store i32 %add.i, i32* %arg.idx1.val, align 4
+ store i32 %add.i, ptr %arg.idx1.val, align 4
; let the pointer escape so we still create a local copy this test uses to
; check the load alignment.
- %tmp = call i32* @escape(i32* nonnull %arg.idx2)
+ %tmp = call ptr @escape(ptr nonnull %arg.idx2)
ret void
}
; Function Attrs: convergent nounwind
-declare dso_local i32* @escape(i32*) local_unnamed_addr
+declare dso_local ptr @escape(ptr) local_unnamed_addr
; CHECK: ld.param.u32 [[value:%r[0-9]+]], [%[[param_addr1]]+12];
; CHECK: st.global.u32 [[[result_addr_g]]], [[value]];
; Function Attrs: nofree norecurse nounwind willreturn mustprogress
-define dso_local void @static_offset(i32* nocapture %arg, %struct.ham* nocapture readonly byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #0 {
+define dso_local void @static_offset(ptr nocapture %arg, ptr nocapture readonly byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #0 {
bb:
%tmp = icmp eq i32 %arg2, 3
br i1 %tmp, label %bb3, label %bb6
bb3: ; preds = %bb
- %tmp4 = getelementptr inbounds %struct.ham, %struct.ham* %arg1, i64 0, i32 0, i64 3
- %tmp5 = load i32, i32* %tmp4, align 4
- store i32 %tmp5, i32* %arg, align 4
+ %tmp4 = getelementptr inbounds %struct.ham, ptr %arg1, i64 0, i32 0, i64 3
+ %tmp5 = load i32, ptr %tmp4, align 4
+ store i32 %tmp5, ptr %arg, align 4
br label %bb6
bb6: ; preds = %bb3, %bb
; CHECK: st.global.u32 [[[result_addr_g]]], [[value]];
; Function Attrs: nofree norecurse nounwind willreturn mustprogress
-define dso_local void @dynamic_offset(i32* nocapture %arg, %struct.ham* nocapture readonly byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #0 {
+define dso_local void @dynamic_offset(ptr nocapture %arg, ptr nocapture readonly byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #0 {
bb:
%tmp = sext i32 %arg2 to i64
- %tmp3 = getelementptr inbounds %struct.ham, %struct.ham* %arg1, i64 0, i32 0, i64 %tmp
- %tmp4 = load i32, i32* %tmp3, align 4
- store i32 %tmp4, i32* %arg, align 4
+ %tmp3 = getelementptr inbounds %struct.ham, ptr %arg1, i64 0, i32 0, i64 %tmp
+ %tmp4 = load i32, ptr %tmp3, align 4
+ store i32 %tmp4, ptr %arg, align 4
ret void
}
; CHECK32: st.global.u8 [{{%r[0-9]+}}], [[value]];
;
; Function Attrs: nofree norecurse nounwind willreturn mustprogress
-define dso_local void @gep_bitcast(i8* nocapture %out, %struct.ham* nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 {
+define dso_local void @gep_bitcast(ptr nocapture %out, ptr nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 {
bb:
%n64 = sext i32 %n to i64
- %gep = getelementptr inbounds %struct.ham, %struct.ham* %in, i64 0, i32 0, i64 %n64
- %bc = bitcast i32* %gep to i8*
- %load = load i8, i8* %bc, align 4
- store i8 %load, i8* %out, align 4
+ %gep = getelementptr inbounds %struct.ham, ptr %in, i64 0, i32 0, i64 %n64
+ %load = load i8, ptr %gep, align 4
+ store i8 %load, ptr %out, align 4
ret void
}
; CHECK32: st.global.u8 [{{%r[0-9]+}}], [[value]];
;
; Function Attrs: nofree norecurse nounwind willreturn mustprogress
-define dso_local void @gep_bitcast_asc(i8* nocapture %out, %struct.ham* nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 {
+define dso_local void @gep_bitcast_asc(ptr nocapture %out, ptr nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 {
bb:
%n64 = sext i32 %n to i64
- %gep = getelementptr inbounds %struct.ham, %struct.ham* %in, i64 0, i32 0, i64 %n64
- %bc = bitcast i32* %gep to i8*
- %asc = addrspacecast i8* %bc to i8 addrspace(101)*
- %load = load i8, i8 addrspace(101)* %asc, align 4
- store i8 %load, i8* %out, align 4
+ %gep = getelementptr inbounds %struct.ham, ptr %in, i64 0, i32 0, i64 %n64
+ %asc = addrspacecast ptr %gep to ptr addrspace(101)
+ %load = load i8, ptr addrspace(101) %asc, align 4
+ store i8 %load, ptr %out, align 4
ret void
}
; CHECK: st.global.u32 [[[result_addr_g]]], [[value]];
; Function Attrs: convergent norecurse nounwind mustprogress
-define dso_local void @pointer_escapes(i32* nocapture %arg, %struct.ham* byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #1 {
+define dso_local void @pointer_escapes(ptr nocapture %arg, ptr byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #1 {
bb:
%tmp = sext i32 %arg2 to i64
- %tmp3 = getelementptr inbounds %struct.ham, %struct.ham* %arg1, i64 0, i32 0, i64 %tmp
- %tmp4 = load i32, i32* %tmp3, align 4
- store i32 %tmp4, i32* %arg, align 4
- %tmp5 = call i32* @escape(i32* nonnull %tmp3) #3
+ %tmp3 = getelementptr inbounds %struct.ham, ptr %arg1, i64 0, i32 0, i64 %tmp
+ %tmp4 = load i32, ptr %tmp3, align 4
+ store i32 %tmp4, ptr %arg, align 4
+ %tmp5 = call ptr @escape(ptr nonnull %tmp3) #3
ret void
}
; Function Attrs: convergent nounwind
-declare dso_local i32* @escape(i32*) local_unnamed_addr
+declare dso_local ptr @escape(ptr) local_unnamed_addr
!llvm.module.flags = !{!0, !1, !2}
!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 9, i32 1]}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0}
-!3 = !{void (i32*, %struct.ham*, i32)* @static_offset, !"kernel", i32 1}
-!4 = !{void (i32*, %struct.ham*, i32)* @dynamic_offset, !"kernel", i32 1}
-!5 = !{void (i32*, %struct.ham*, i32)* @pointer_escapes, !"kernel", i32 1}
-!6 = !{void (i8*, %struct.ham*, i32)* @gep_bitcast, !"kernel", i32 1}
-!7 = !{void (i8*, %struct.ham*, i32)* @gep_bitcast_asc, !"kernel", i32 1}
+!3 = !{ptr @static_offset, !"kernel", i32 1}
+!4 = !{ptr @dynamic_offset, !"kernel", i32 1}
+!5 = !{ptr @pointer_escapes, !"kernel", i32 1}
+!6 = !{ptr @gep_bitcast, !"kernel", i32 1}
+!7 = !{ptr @gep_bitcast_asc, !"kernel", i32 1}
; Verify that both %input and %output are converted to global pointers and then
; addrspacecast'ed back to the original type.
-define void @kernel(float* %input, float* %output) {
+define void @kernel(ptr %input, ptr %output) {
; CHECK-LABEL: .visible .entry kernel(
; CHECK: cvta.to.global.u64
; CHECK: cvta.to.global.u64
- %1 = load float, float* %input, align 4
+ %1 = load float, ptr %input, align 4
; CHECK: ld.global.f32
- store float %1, float* %output, align 4
+ store float %1, ptr %output, align 4
; CHECK: st.global.f32
ret void
}
-define void @kernel2(float addrspace(1)* %input, float addrspace(1)* %output) {
+define void @kernel2(ptr addrspace(1) %input, ptr addrspace(1) %output) {
; CHECK-LABEL: .visible .entry kernel2(
; CHECK-NOT: cvta.to.global.u64
- %1 = load float, float addrspace(1)* %input, align 4
+ %1 = load float, ptr addrspace(1) %input, align 4
; CHECK: ld.global.f32
- store float %1, float addrspace(1)* %output, align 4
+ store float %1, ptr addrspace(1) %output, align 4
; CHECK: st.global.f32
ret void
}
-%struct.S = type { i32*, i32* }
+%struct.S = type { ptr, ptr }
-define void @ptr_in_byval_kernel(%struct.S* byval(%struct.S) %input, i32* %output) {
+define void @ptr_in_byval_kernel(ptr byval(%struct.S) %input, ptr %output) {
; CHECK-LABEL: .visible .entry ptr_in_byval_kernel(
; CHECK: ld.param.u64 %[[optr:rd.*]], [ptr_in_byval_kernel_param_1]
; CHECK: cvta.to.global.u64 %[[optr_g:.*]], %[[optr]];
; CHECK: ld.param.u64 %[[iptr:rd.*]], [ptr_in_byval_kernel_param_0+8]
; CHECK: cvta.to.global.u64 %[[iptr_g:.*]], %[[iptr]];
- %b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
- %b = load i32*, i32** %b_ptr, align 8
- %v = load i32, i32* %b, align 4
+ %b_ptr = getelementptr inbounds %struct.S, ptr %input, i64 0, i32 1
+ %b = load ptr, ptr %b_ptr, align 8
+ %v = load i32, ptr %b, align 4
; CHECK: ld.global.u32 %[[val:.*]], [%[[iptr_g]]]
- store i32 %v, i32* %output, align 4
+ store i32 %v, ptr %output, align 4
; CHECK: st.global.u32 [%[[optr_g]]], %[[val]]
ret void
}
; Regular functions lower byval arguments differently. We need to make
; sure that we're loading byval argument data using [symbol+offset].
; There's also no assumption that all pointers within are in global space.
-define void @ptr_in_byval_func(%struct.S* byval(%struct.S) %input, i32* %output) {
+define void @ptr_in_byval_func(ptr byval(%struct.S) %input, ptr %output) {
; CHECK-LABEL: .visible .func ptr_in_byval_func(
; CHECK: ld.param.u64 %[[optr:rd.*]], [ptr_in_byval_func_param_1]
; CHECK: ld.param.u64 %[[iptr:rd.*]], [ptr_in_byval_func_param_0+8]
- %b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
- %b = load i32*, i32** %b_ptr, align 8
- %v = load i32, i32* %b, align 4
+ %b_ptr = getelementptr inbounds %struct.S, ptr %input, i64 0, i32 1
+ %b = load ptr, ptr %b_ptr, align 8
+ %v = load i32, ptr %b, align 4
; CHECK: ld.u32 %[[val:.*]], [%[[iptr]]]
- store i32 %v, i32* %output, align 4
+ store i32 %v, ptr %output, align 4
; CHECK: st.u32 [%[[optr]]], %[[val]]
ret void
}
!nvvm.annotations = !{!0, !1, !2}
-!0 = !{void (float*, float*)* @kernel, !"kernel", i32 1}
-!1 = !{void (float addrspace(1)*, float addrspace(1)*)* @kernel2, !"kernel", i32 1}
-!2 = !{void (%struct.S*, i32*)* @ptr_in_byval_kernel, !"kernel", i32 1}
+!0 = !{ptr @kernel, !"kernel", i32 1}
+!1 = !{ptr @kernel2, !"kernel", i32 1}
+!2 = !{ptr @ptr_in_byval_kernel, !"kernel", i32 1}
define float @post_dominate(float %x, i1 %cond) {
; CHECK-LABEL: post_dominate(
entry:
- %0 = load float, float* addrspacecast (float addrspace(3)* @scalar1 to float*), align 4
- %1 = load float, float* addrspacecast (float addrspace(3)* @scalar2 to float*), align 4
+ %0 = load float, ptr addrspacecast (ptr addrspace(3) @scalar1 to ptr), align 4
+ %1 = load float, ptr addrspacecast (ptr addrspace(3) @scalar2 to ptr), align 4
; CHECK: ld.shared.f32
; CHECK: ld.shared.f32
%2 = fmul float %0, %0
; CHECK: .extern .global .align 4 .u32 decl_g;
@decl_g = external addrspace(1) global i32, align 4
; CHECK: .extern .global .attribute(.managed) .align 8 .b32 managed_decl_g;
-@managed_decl_g = external addrspace(1) global i32*, align 8
+@managed_decl_g = external addrspace(1) global ptr, align 8
!nvvm.annotations = !{!0, !1}
-!0 = !{i32 addrspace(1)* @managed_g, !"managed", i32 1}
-!1 = !{i32* addrspace(1)* @managed_decl_g, !"managed", i32 1}
+!0 = !{ptr addrspace(1) @managed_g, !"managed", i32 1}
+!1 = !{ptr addrspace(1) @managed_decl_g, !"managed", i32 1}
; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
-declare void @llvm.nvvm.mbarrier.init(i64* %a, i32 %b)
-declare void @llvm.nvvm.mbarrier.init.shared(i64 addrspace(3)* %a, i32 %b)
+declare void @llvm.nvvm.mbarrier.init(ptr %a, i32 %b)
+declare void @llvm.nvvm.mbarrier.init.shared(ptr addrspace(3) %a, i32 %b)
; CHECK-LABEL: barrierinit
-define void @barrierinit(i64* %a, i32 %b) {
+define void @barrierinit(ptr %a, i32 %b) {
; CHECK_PTX32: mbarrier.init.b64 [%r{{[0-9]+}}], %r{{[0-9]+}};
; CHECK_PTX64: mbarrier.init.b64 [%rd{{[0-9]+}}], %r{{[0-9]+}};
- tail call void @llvm.nvvm.mbarrier.init(i64* %a, i32 %b)
+ tail call void @llvm.nvvm.mbarrier.init(ptr %a, i32 %b)
ret void
}
; CHECK-LABEL: barrierinitshared
-define void @barrierinitshared(i64 addrspace(3)* %a, i32 %b) {
+define void @barrierinitshared(ptr addrspace(3) %a, i32 %b) {
; CHECK_PTX32: mbarrier.init.shared.b64 [%r{{[0-9]+}}], %r{{[0-9]+}};
; CHECK_PTX64: mbarrier.init.shared.b64 [%rd{{[0-9]+}}], %r{{[0-9]+}};
- tail call void @llvm.nvvm.mbarrier.init.shared(i64 addrspace(3)* %a, i32 %b)
+ tail call void @llvm.nvvm.mbarrier.init.shared(ptr addrspace(3) %a, i32 %b)
ret void
}
-declare void @llvm.nvvm.mbarrier.inval(i64* %a)
-declare void @llvm.nvvm.mbarrier.inval.shared(i64 addrspace(3)* %a)
+declare void @llvm.nvvm.mbarrier.inval(ptr %a)
+declare void @llvm.nvvm.mbarrier.inval.shared(ptr addrspace(3) %a)
; CHECK-LABEL: barrierinval
-define void @barrierinval(i64* %a) {
+define void @barrierinval(ptr %a) {
; CHECK_PTX32: mbarrier.inval.b64 [%r{{[0-1]+}}];
; CHECK_PTX64: mbarrier.inval.b64 [%rd{{[0-1]+}}];
- tail call void @llvm.nvvm.mbarrier.inval(i64* %a)
+ tail call void @llvm.nvvm.mbarrier.inval(ptr %a)
ret void
}
; CHECK-LABEL: barrierinvalshared
-define void @barrierinvalshared(i64 addrspace(3)* %a) {
+define void @barrierinvalshared(ptr addrspace(3) %a) {
; CHECK_PTX32: mbarrier.inval.shared.b64 [%r{{[0-1]+}}];
; CHECK_PTX64: mbarrier.inval.shared.b64 [%rd{{[0-1]+}}];
- tail call void @llvm.nvvm.mbarrier.inval.shared(i64 addrspace(3)* %a)
+ tail call void @llvm.nvvm.mbarrier.inval.shared(ptr addrspace(3) %a)
ret void
}
-declare i64 @llvm.nvvm.mbarrier.arrive(i64* %a)
-declare i64 @llvm.nvvm.mbarrier.arrive.shared(i64 addrspace(3)* %a)
+declare i64 @llvm.nvvm.mbarrier.arrive(ptr %a)
+declare i64 @llvm.nvvm.mbarrier.arrive.shared(ptr addrspace(3) %a)
; CHECK-LABEL: barrierarrive
-define void @barrierarrive(i64* %a) {
+define void @barrierarrive(ptr %a) {
; CHECK_PTX32: mbarrier.arrive.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}];
; CHECK_PTX64: mbarrier.arrive.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}];
- %ret = tail call i64 @llvm.nvvm.mbarrier.arrive(i64* %a)
+ %ret = tail call i64 @llvm.nvvm.mbarrier.arrive(ptr %a)
ret void
}
; CHECK-LABEL: barrierarriveshared
-define void @barrierarriveshared(i64 addrspace(3)* %a) {
+define void @barrierarriveshared(ptr addrspace(3) %a) {
; CHECK_PTX32: mbarrier.arrive.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}];
; CHECK_PTX64: mbarrier.arrive.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}];
- %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.shared(i64 addrspace(3)* %a)
+ %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.shared(ptr addrspace(3) %a)
ret void
}
-declare i64 @llvm.nvvm.mbarrier.arrive.noComplete(i64* %a, i32 %b)
-declare i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(i64 addrspace(3)* %a, i32 %b)
+declare i64 @llvm.nvvm.mbarrier.arrive.noComplete(ptr %a, i32 %b)
+declare i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(ptr addrspace(3) %a, i32 %b)
; CHECK-LABEL: barrierarrivenoComplete
-define void @barrierarrivenoComplete(i64* %a, i32 %b) {
+define void @barrierarrivenoComplete(ptr %a, i32 %b) {
; CHECK_PTX32: mbarrier.arrive.noComplete.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}};
; CHECK_PTX64: mbarrier.arrive.noComplete.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}};
- %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete(i64* %a, i32 %b)
+ %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete(ptr %a, i32 %b)
ret void
}
; CHECK-LABEL: barrierarrivenoCompleteshared
-define void @barrierarrivenoCompleteshared(i64 addrspace(3)* %a, i32 %b) {
+define void @barrierarrivenoCompleteshared(ptr addrspace(3) %a, i32 %b) {
; CHECK_PTX32: mbarrier.arrive.noComplete.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}};
; CHECK_PTX64: mbarrier.arrive.noComplete.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}};
- %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(i64 addrspace(3)* %a, i32 %b)
+ %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(ptr addrspace(3) %a, i32 %b)
ret void
}
-declare i64 @llvm.nvvm.mbarrier.arrive.drop(i64* %a)
-declare i64 @llvm.nvvm.mbarrier.arrive.drop.shared(i64 addrspace(3)* %a)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop(ptr %a)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.shared(ptr addrspace(3) %a)
; CHECK-LABEL: barrierarrivedrop
-define void @barrierarrivedrop(i64* %a) {
+define void @barrierarrivedrop(ptr %a) {
; CHECK_PTX32: mbarrier.arrive_drop.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}];
; CHECK_PTX64: mbarrier.arrive_drop.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}];
- %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop(i64* %a)
+ %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop(ptr %a)
ret void
}
; CHECK-LABEL: barrierarrivedropshared
-define void @barrierarrivedropshared(i64 addrspace(3)* %a) {
+define void @barrierarrivedropshared(ptr addrspace(3) %a) {
; CHECK_PTX32: mbarrier.arrive_drop.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}];
; CHECK_PTX64: mbarrier.arrive_drop.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}];
- %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.shared(i64 addrspace(3)* %a)
+ %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.shared(ptr addrspace(3) %a)
ret void
}
-declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(i64* %a, i32 %b)
-declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(i64 addrspace(3)* %a, i32 %b)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(ptr %a, i32 %b)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(ptr addrspace(3) %a, i32 %b)
; CHECK-LABEL: barrierarrivedropnoComplete
-define void @barrierarrivedropnoComplete(i64* %a, i32 %b) {
+define void @barrierarrivedropnoComplete(ptr %a, i32 %b) {
; CHECK_PTX32: mbarrier.arrive_drop.noComplete.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}};
; CHECK_PTX64: mbarrier.arrive_drop.noComplete.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}};
- %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(i64* %a, i32 %b)
+ %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(ptr %a, i32 %b)
ret void
}
; CHECK-LABEL: barrierarrivedropnoCompleteshared
-define void @barrierarrivedropnoCompleteshared(i64 addrspace(3)* %a, i32 %b) {
+define void @barrierarrivedropnoCompleteshared(ptr addrspace(3) %a, i32 %b) {
; CHECK_PTX32: mbarrier.arrive_drop.noComplete.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}};
; CHECK_PTX64: mbarrier.arrive_drop.noComplete.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}};
- %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(i64 addrspace(3)* %a, i32 %b)
+ %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(ptr addrspace(3) %a, i32 %b)
ret void
}
-declare i1 @llvm.nvvm.mbarrier.test.wait(i64* %a, i64 %b)
-declare i1 @llvm.nvvm.mbarrier.test.wait.shared(i64 addrspace(3)* %a, i64 %b)
+declare i1 @llvm.nvvm.mbarrier.test.wait(ptr %a, i64 %b)
+declare i1 @llvm.nvvm.mbarrier.test.wait.shared(ptr addrspace(3) %a, i64 %b)
; CHECK-LABEL: barriertestwait
-define void @barriertestwait(i64* %a, i64 %b) {
+define void @barriertestwait(ptr %a, i64 %b) {
; CHECK_PTX32: mbarrier.test_wait.b64 %p{{[0-9]+}}, [%r{{[0-9]+}}], %rd{{[0-9]+}};
; CHECK_PTX64: mbarrier.test_wait.b64 %p{{[0-9]+}}, [%rd{{[0-9]+}}], %rd{{[0-9]+}};
- %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait(i64* %a, i64 %b)
+ %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait(ptr %a, i64 %b)
ret void
}
; CHECK-LABEL: barriertestwaitshared
-define void @barriertestwaitshared(i64 addrspace(3)* %a, i64 %b) {
+define void @barriertestwaitshared(ptr addrspace(3) %a, i64 %b) {
; CHECK_PTX32: mbarrier.test_wait.shared.b64 %p{{[0-9]+}}, [%r{{[0-9]+}}], %rd{{[0-9]+}};
; CHECK_PTX64: mbarrier.test_wait.shared.b64 %p{{[0-9]+}}, [%rd{{[0-9]+}}], %rd{{[0-9]+}};
- %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait.shared(i64 addrspace(3)* %a, i64 %b)
+ %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait.shared(ptr addrspace(3) %a, i64 %b)
ret void
}
declare i32 @llvm.nvvm.mbarrier.pending.count(i64 %b)
; CHECK-LABEL: barrierpendingcount
-define i32 @barrierpendingcount(i64* %a, i64 %b) {
+define i32 @barrierpendingcount(ptr %a, i64 %b) {
; CHECK_PTX32: mbarrier.pending_count.b64 %r{{[0-9]+}}, %rd{{[0-9]+}};
; CHECK_PTX64: mbarrier.pending_count.b64 %r{{[0-9]+}}, %rd{{[0-9]+}};
%ret = tail call i32 @llvm.nvvm.mbarrier.pending.count(i64 %b)
; RUN: llc < %s -march=nvptx -O0 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx -O0 | %ptxas-verify %}
-define i16 @test1(i16* %sur1) {
+define i16 @test1(ptr %sur1) {
; CHECK-NOT: mov.u16 %rs{{[0-9]+}}, 32767
%_tmp21.i = icmp sle i16 0, 0
%_tmp22.i = select i1 %_tmp21.i, i16 0, i16 32767
- store i16 %_tmp22.i, i16* %sur1
+ store i16 %_tmp22.i, ptr %sur1
ret i16 0
}
target triple = "nvptx64-nvidia-cuda"
; CHECK-LABEL: t1
-define <4 x float> @t1(i8* %p1) {
+define <4 x float> @t1(ptr %p1) {
; CHECK-NOT: ld.v4
; CHECK-NOT: ld.v2
; CHECK-NOT: ld.f32
; CHECK: ld.u8
- %cast = bitcast i8* %p1 to <4 x float>*
- %r = load <4 x float>, <4 x float>* %cast, align 1
+ %r = load <4 x float>, ptr %p1, align 1
ret <4 x float> %r
}
; CHECK-LABEL: t2
-define <4 x float> @t2(i8* %p1) {
+define <4 x float> @t2(ptr %p1) {
; CHECK-NOT: ld.v4
; CHECK-NOT: ld.v2
; CHECK: ld.f32
- %cast = bitcast i8* %p1 to <4 x float>*
- %r = load <4 x float>, <4 x float>* %cast, align 4
+ %r = load <4 x float>, ptr %p1, align 4
ret <4 x float> %r
}
; CHECK-LABEL: t3
-define <4 x float> @t3(i8* %p1) {
+define <4 x float> @t3(ptr %p1) {
; CHECK-NOT: ld.v4
; CHECK: ld.v2
- %cast = bitcast i8* %p1 to <4 x float>*
- %r = load <4 x float>, <4 x float>* %cast, align 8
+ %r = load <4 x float>, ptr %p1, align 8
ret <4 x float> %r
}
; CHECK-LABEL: t4
-define <4 x float> @t4(i8* %p1) {
+define <4 x float> @t4(ptr %p1) {
; CHECK: ld.v4
- %cast = bitcast i8* %p1 to <4 x float>*
- %r = load <4 x float>, <4 x float>* %cast, align 16
+ %r = load <4 x float>, ptr %p1, align 16
ret <4 x float> %r
}
; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]]
; CHECK: ret
-define void @test_v1halfp0a1(<1 x half> * noalias readonly %from, <1 x half> * %to) {
- %1 = load <1 x half>, <1 x half> * %from , align 1
- store <1 x half> %1, <1 x half> * %to , align 1
+define void @test_v1halfp0a1(ptr noalias readonly %from, ptr %to) {
+ %1 = load <1 x half>, ptr %from , align 1
+ store <1 x half> %1, ptr %to , align 1
ret void
}
; CHECK-DAG: ld.u8 [[B3:%r[sd]?[0-9]+]], [%[[FROM]]+3]
; CHECK-DAG: st.u8 [%[[TO]]+3],
; CHECK: ret
-define void @test_v2halfp0a1(<2 x half> * noalias readonly %from, <2 x half> * %to) {
- %1 = load <2 x half>, <2 x half> * %from , align 1
- store <2 x half> %1, <2 x half> * %to , align 1
+define void @test_v2halfp0a1(ptr noalias readonly %from, ptr %to) {
+ %1 = load <2 x half>, ptr %from , align 1
+ store <2 x half> %1, ptr %to , align 1
ret void
}
; CHECK-DAG: ld.u8 [[B7:%r[sd]?[0-9]+]], [%[[FROM]]+7]
; CHECK-DAG: st.u8 [%[[TO]]+7], [[B7]]
; CHECK: ret
-define void @test_v4halfp0a1(<4 x half> * noalias readonly %from, <4 x half> * %to) {
- %1 = load <4 x half>, <4 x half> * %from , align 1
- store <4 x half> %1, <4 x half> * %to , align 1
+define void @test_v4halfp0a1(ptr noalias readonly %from, ptr %to) {
+ %1 = load <4 x half>, ptr %from , align 1
+ store <4 x half> %1, ptr %to , align 1
ret void
}
; CHECK-LABEL: s1
-define void @s1(<4 x float>* %p1, <4 x float> %v) {
+define void @s1(ptr %p1, <4 x float> %v) {
; CHECK-NOT: st.v4
; CHECK-NOT: st.v2
; CHECK-NOT: st.f32
; CHECK: st.u8
- store <4 x float> %v, <4 x float>* %p1, align 1
+ store <4 x float> %v, ptr %p1, align 1
ret void
}
; CHECK-LABEL: s2
-define void @s2(<4 x float>* %p1, <4 x float> %v) {
+define void @s2(ptr %p1, <4 x float> %v) {
; CHECK-NOT: st.v4
; CHECK-NOT: st.v2
; CHECK: st.f32
- store <4 x float> %v, <4 x float>* %p1, align 4
+ store <4 x float> %v, ptr %p1, align 4
ret void
}
; CHECK-LABEL: s3
-define void @s3(<4 x float>* %p1, <4 x float> %v) {
+define void @s3(ptr %p1, <4 x float> %v) {
; CHECK-NOT: st.v4
- store <4 x float> %v, <4 x float>* %p1, align 8
+ store <4 x float> %v, ptr %p1, align 8
ret void
}
; CHECK-LABEL: s4
-define void @s4(<4 x float>* %p1, <4 x float> %v) {
+define void @s4(ptr %p1, <4 x float> %v) {
; CHECK: st.v4
- store <4 x float> %v, <4 x float>* %p1, align 16
+ store <4 x float> %v, ptr %p1, align 16
ret void
}
@"$str" = private addrspace(1) constant [4 x i8] c"str\00"
-declare void @str2(i8* %str)
+declare void @str2(ptr %str)
define void @str1() {
entry:
;; CHECK: mov.u64 %rd{{[0-9]+}}, $str;
- tail call void @str2(i8* getelementptr ([4 x i8], [4 x i8]* addrspacecast ([4 x i8] addrspace(1)* @"$str" to [4 x i8]*), i64 0, i64 0))
+ tail call void @str2(ptr addrspacecast (ptr addrspace(1) @"$str" to ptr))
ret void
}
; CHECK-NOT: call void @llvm.nvvm.barrier0
; Function Attrs: nounwind
-define void @foo(float* %output) #1 {
+define void @foo(ptr %output) #1 {
entry:
- %output.addr = alloca float*, align 8
- store float* %output, float** %output.addr, align 8
- %0 = load float*, float** %output.addr, align 8
- %arrayidx = getelementptr inbounds float, float* %0, i64 0
- %1 = load float, float* %arrayidx, align 4
+ %output.addr = alloca ptr, align 8
+ store ptr %output, ptr %output.addr, align 8
+ %0 = load ptr, ptr %output.addr, align 8
+ %1 = load float, ptr %0, align 4
%conv = fpext float %1 to double
%cmp = fcmp olt double %conv, 1.000000e+01
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- %2 = load float*, float** %output.addr, align 8
- %3 = load float, float* %2, align 4
+ %2 = load ptr, ptr %output.addr, align 8
+ %3 = load float, ptr %2, align 4
%conv1 = fpext float %3 to double
%add = fadd double %conv1, 1.000000e+00
%conv2 = fptrunc double %add to float
- store float %conv2, float* %2, align 4
+ store float %conv2, ptr %2, align 4
br label %if.end
if.else: ; preds = %entry
- %4 = load float*, float** %output.addr, align 8
- %5 = load float, float* %4, align 4
+ %4 = load ptr, ptr %output.addr, align 8
+ %5 = load float, ptr %4, align 4
%conv3 = fpext float %5 to double
%add4 = fadd double %conv3, 2.000000e+00
%conv5 = fptrunc double %add4 to float
- store float %conv5, float* %4, align 4
+ store float %conv5, ptr %4, align 4
br label %if.end
if.end: ; preds = %if.else, %if.then
call void @llvm.nvvm.barrier0()
- %6 = load float*, float** %output.addr, align 8
- %arrayidx6 = getelementptr inbounds float, float* %6, i64 0
- %7 = load float, float* %arrayidx6, align 4
+ %6 = load ptr, ptr %output.addr, align 8
+ %7 = load float, ptr %6, align 4
%conv7 = fpext float %7 to double
%cmp8 = fcmp olt double %conv7, 1.000000e+01
br i1 %cmp8, label %if.then9, label %if.else13
if.then9: ; preds = %if.end
- %8 = load float*, float** %output.addr, align 8
- %9 = load float, float* %8, align 4
+ %8 = load ptr, ptr %output.addr, align 8
+ %9 = load float, ptr %8, align 4
%conv10 = fpext float %9 to double
%add11 = fadd double %conv10, 3.000000e+00
%conv12 = fptrunc double %add11 to float
- store float %conv12, float* %8, align 4
+ store float %conv12, ptr %8, align 4
br label %if.end17
if.else13: ; preds = %if.end
- %10 = load float*, float** %output.addr, align 8
- %11 = load float, float* %10, align 4
+ %10 = load ptr, ptr %output.addr, align 8
+ %11 = load float, ptr %10, align 4
%conv14 = fpext float %11 to double
%add15 = fadd double %conv14, 4.000000e+00
%conv16 = fptrunc double %add15 to float
- store float %conv16, float* %10, align 4
+ store float %conv16, ptr %10, align 4
br label %if.end17
if.end17: ; preds = %if.else13, %if.then9
; Function Attrs: noduplicate nounwind
declare void @llvm.nvvm.barrier0() #2
-!0 = !{void (float*)* @foo, !"kernel", i32 1}
+!0 = !{ptr @foo, !"kernel", i32 1}
!1 = !{null, !"align", i32 8}
target triple = "nvptx64-nvidia-cuda"
@Funcs = local_unnamed_addr addrspace(1) externally_initialized
- global [1 x void (i8*)*] [void (i8*)* @func], align 8
+ global [1 x ptr] [ptr @func], align 8
-declare void @func(i8*)
+declare void @func(ptr)
; CHECK: Funcs[1] = {func}
; #pragma nounroll
; for (int i = 0; i < 2; ++i)
; output[i] = input[i];
-define void @nounroll(float* %input, float* %output) {
+define void @nounroll(ptr %input, ptr %output) {
; CHECK-LABEL: .visible .func nounroll(
entry:
br label %for.body
; CHECK: .pragma "nounroll"
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%idxprom = sext i32 %i.06 to i64
- %arrayidx = getelementptr inbounds float, float* %input, i64 %idxprom
- %0 = load float, float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, ptr %input, i64 %idxprom
+ %0 = load float, ptr %arrayidx, align 4
; CHECK: ld.f32
- %arrayidx2 = getelementptr inbounds float, float* %output, i64 %idxprom
- store float %0, float* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds float, ptr %output, i64 %idxprom
+ store float %0, ptr %arrayidx2, align 4
; CHECK: st.f32
%inc = add nuw nsw i32 %i.06, 1
%exitcond = icmp eq i32 %inc, 2
; #pragma unroll 1
; for (int i = 0; i < 2; ++i)
; output[i] = input[i];
-define void @unroll1(float* %input, float* %output) {
+define void @unroll1(ptr %input, ptr %output) {
; CHECK-LABEL: .visible .func unroll1(
entry:
br label %for.body
; CHECK: .pragma "nounroll"
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%idxprom = sext i32 %i.06 to i64
- %arrayidx = getelementptr inbounds float, float* %input, i64 %idxprom
- %0 = load float, float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, ptr %input, i64 %idxprom
+ %0 = load float, ptr %arrayidx, align 4
; CHECK: ld.f32
- %arrayidx2 = getelementptr inbounds float, float* %output, i64 %idxprom
- store float %0, float* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds float, ptr %output, i64 %idxprom
+ store float %0, ptr %arrayidx2, align 4
; CHECK: st.f32
%inc = add nuw nsw i32 %i.06, 1
%exitcond = icmp eq i32 %inc, 2
target triple = "nvptx-unknown-nvcl"
-define void @foo(i64 %img, i64 %sampler, <5 x float>* align 32 %v1, i32* %v2) {
+define void @foo(i64 %img, i64 %sampler, ptr align 32 %v1, ptr %v2) {
; The parameter alignment is determined by the align attribute (default 1).
; CHECK-LABEL: .entry foo(
; CHECK: .param .u32 .ptr .align 32 foo_param_2
}
!nvvm.annotations = !{!1, !2, !3}
-!1 = !{void (i64, i64, <5 x float>*, i32*)* @foo, !"kernel", i32 1}
-!2 = !{void (i64, i64, <5 x float>*, i32*)* @foo, !"rdoimage", i32 0}
-!3 = !{void (i64, i64, <5 x float>*, i32*)* @foo, !"sampler", i32 1}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @foo, !"rdoimage", i32 0}
+!3 = !{ptr @foo, !"sampler", i32 1}
@"$str" = private addrspace(1) constant [12 x i8] c"__CUDA_ARCH\00"
-declare i32 @__nvvm_reflect(i8*)
+declare i32 @__nvvm_reflect(ptr)
; COMMON-LABEL: @foo
define i32 @foo(float %a, float %b) {
; COMMON-NOT: call i32 @__nvvm_reflect
- %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([12 x i8], [12 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
+ %reflect = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @"$str" to ptr))
; SM20: ret i32 200
; SM35: ret i32 350
ret i32 %reflect
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-reflect | FileCheck %s
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-reflect | FileCheck %s
-declare i32 @__nvvm_reflect(i8*)
+declare i32 @__nvvm_reflect(ptr)
@str = private unnamed_addr addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00"
define i32 @foo() {
- %call = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*))
+ %call = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @str to ptr))
; CHECK: ret i32 42
ret i32 %call
}
@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
-declare i32 @__nvvm_reflect(i8*)
-declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
+declare i32 @__nvvm_reflect(ptr)
+declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4))
; CHECK-LABEL: @foo
define float @foo(float %a, float %b) {
; CHECK-NOT: call i32 @__nvvm_reflect
- %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0))
- %reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
+ %ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str)
+ %reflect = tail call i32 @__nvvm_reflect(ptr %ptr)
%cmp = icmp ugt i32 %reflect, 0
br i1 %cmp, label %use_mul, label %use_add
ret float %ret
}
-declare i32 @llvm.nvvm.reflect.p0i8(i8*)
+declare i32 @llvm.nvvm.reflect.p0(ptr)
; CHECK-LABEL: define i32 @intrinsic
define i32 @intrinsic() {
; CHECK-NOT: call i32 @llvm.nvvm.reflect
; USE_FTZ_0: ret i32 0
; USE_FTZ_1: ret i32 1
- %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0))
- %reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)
+ %ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str)
+ %reflect = tail call i32 @llvm.nvvm.reflect.p0(ptr %ptr)
ret i32 %reflect
}
; CHECK-LABEL: @bar
define float @bar(float %a, float %b) {
; CHECK-NOT: call i32 @__nvvm_reflect
- %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
+ %reflect = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @"$str" to ptr))
%cmp = icmp ne i32 %reflect, 0
br i1 %cmp, label %use_mul, label %use_add
; CHECK: .extern .func func
; CHECK: .u8 p;
-%t1 = type <{ i16, i8*, i8, void ()*, i8*, i32 }>
+%t1 = type <{ i16, ptr, i8, ptr, ptr, i32 }>
@s1 = addrspace(1) global %t1 <{
; ERROR: initialized packed aggregate with pointers 's1' requires at least PTX ISA version 7.1
; CHECK32: .global .align 1 .u8 s1[19] = {
; CHECK64: .global .align 1 .u8 s1[31] = {
i16 12,
; CHECK-SAME: 12, 0,
- i8* addrspacecast (i8 addrspace(1)* @p to i8*),
+ ptr addrspacecast (ptr addrspace(1) @p to ptr),
; CHECK-SAME: 0xFF(generic(p)), 0xFF00(generic(p)), 0xFF0000(generic(p)), 0xFF000000(generic(p)),
; CHECK64-SAME: 0xFF00000000(generic(p)), 0xFF0000000000(generic(p)), 0xFF000000000000(generic(p)), 0xFF00000000000000(generic(p)),
i8 34,
; CHECK-SAME: 34
- void ()* @func,
+ ptr @func,
; CHECK-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
- i8* addrspacecast (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* @p, i32 3) to i8*),
+ ptr addrspacecast (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) @p, i32 3) to ptr),
; CHECK-SAME: 0xFF(generic(p)+3), 0xFF00(generic(p)+3), 0xFF0000(generic(p)+3), 0xFF000000(generic(p)+3),
; CHECK64-SAME: 0xFF00000000(generic(p)+3), 0xFF0000000000(generic(p)+3), 0xFF000000000000(generic(p)+3), 0xFF00000000000000(generic(p)+3),
i32 56 }>, align 1
;; Test a case than an unaligned pointer is in a nested struct.
-%t2i = type <{ void ()* }>
+%t2i = type <{ ptr }>
%t2o = type { i8, %t2i, i32 }
@s2 = addrspace(1) global %t2o {
; CHECK32: .global .align 8 .u8 s2[12] = {
; CHECK64: .global .align 8 .u8 s2[16] = {
i8 12,
; CHECK-SAME: 12,
- %t2i <{ void()* @func }>,
+ %t2i <{ ptr @func }>,
; CHECK-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
i32 34}
;; is printed in bytes and uses the mask() operator for pointers even though
;; the pointers are aligned.
-%t3 = type <{ void ()*, i8 }>
+%t3 = type <{ ptr, i8 }>
@s3 = addrspace(1) global %t3 <{
; CHECK32: .global .align 1 .u8 s3[5] = {
; CHECK64: .global .align 1 .u8 s3[9] = {
- void ()* @func,
+ ptr @func,
; CHECK-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
i8 56 }>, align 1
;; Test that a packed struct with aligned pointers is printed in words.
-%t4 = type <{ void ()*, i64 }>
+%t4 = type <{ ptr, i64 }>
@s4 = addrspace(1) global %t4 <{
; CHECK32: .global .align 1 .u32 s4[3] = {
; CHECK64: .global .align 1 .u64 s4[2] = {
- void()* @func,
+ ptr @func,
; CHECK-SAME: func,
i64 15}>, align 1
; CHECK32-SAME: 15, 0};
;; Test that a packed struct with unaligned pointers inside an array is handled.
-%t5 = type <{ void ()*, i16 }>
-@a5 = addrspace(1) global [2 x %t5] [%t5 <{ void()* @func, i16 5 }>, %t5 <{ void()* @func, i16 9 }> ]
+%t5 = type <{ ptr, i16 }>
+@a5 = addrspace(1) global [2 x %t5] [%t5 <{ ptr @func, i16 5 }>, %t5 <{ ptr @func, i16 9 }> ]
; CHECK32: .global .align 8 .u8 a5[12] = {
; CHECK32-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func), 5, 0,
; CHECK32-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func), 9, 0};
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
-;;; Need 4-byte alignment on float* passed byval
-define ptx_device void @t1(float* byval(float) %x) {
+;;; Need 4-byte alignment on ptr passed byval
+define ptx_device void @t1(ptr byval(float) %x) {
; CHECK: .func t1
; CHECK: .param .align 4 .b8 t1_param_0[4]
ret void
}
-;;; Need 8-byte alignment on double* passed byval
-define ptx_device void @t2(double* byval(double) %x) {
+;;; Need 8-byte alignment on ptr passed byval
+define ptx_device void @t2(ptr byval(double) %x) {
; CHECK: .func t2
; CHECK: .param .align 8 .b8 t2_param_0[8]
ret void
;;; Need 4-byte alignment on float2* passed byval
%struct.float2 = type { float, float }
-define ptx_device void @t3(%struct.float2* byval(%struct.float2) %x) {
+define ptx_device void @t3(ptr byval(%struct.float2) %x) {
; CHECK: .func t3
; CHECK: .param .align 4 .b8 t3_param_0[8]
ret void
;;; Need at least 4-byte alignment in order to avoid miscompilation by
;;; ptxas for sm_50+
-define ptx_device void @t4(i8* byval(i8) %x) {
+define ptx_device void @t4(ptr byval(i8) %x) {
; CHECK: .func t4
; CHECK: .param .align 4 .b8 t4_param_0[1]
ret void
}
;;; Make sure we adjust alignment at the call site as well.
-define ptx_device void @t5(i8* align 2 byval(i8) %x) {
+define ptx_device void @t5(ptr align 2 byval(i8) %x) {
; CHECK: .func t5
; CHECK: .param .align 4 .b8 t5_param_0[1]
; CHECK: {
; CHECK: .param .align 4 .b8 param0[1];
; CHECK: call.uni
- call void @t4(i8* byval(i8) %x)
+ call void @t4(ptr byval(i8) %x)
ret void
}
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
-define ptx_kernel void @t1(i1* %a) {
+define ptx_kernel void @t1(ptr %a) {
; PTX32: mov.u16 %rs{{[0-9]+}}, 0;
; PTX32-NEXT: st.global.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}};
; PTX64: mov.u16 %rs{{[0-9]+}}, 0;
; PTX64-NEXT: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}};
- store i1 false, i1* %a
+ store i1 false, ptr %a
ret void
}
-define ptx_kernel void @t2(i1* %a, i8* %b) {
+define ptx_kernel void @t2(ptr %a, ptr %b) {
; PTX32: ld.global.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
; PTX32: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
- %t1 = load i1, i1* %a
+ %t1 = load i1, ptr %a
%t2 = select i1 %t1, i8 1, i8 2
- store i8 %t2, i8* %b
+ store i8 %t2, ptr %b
ret void
}
define float @foo() {
; CHECK: ld.const.f32
- %val = load float, float addrspace(4)* @one_f
+ %val = load float, ptr addrspace(4) @one_f
ret float %val
}
; Function Attrs: nounwind
; CHECK: .func kernelgen_memcpy
-define ptx_device void @kernelgen_memcpy(i8* nocapture %dst) #0 {
+define ptx_device void @kernelgen_memcpy(ptr nocapture %dst) #0 {
entry:
br i1 undef, label %for.end, label %vector.body
vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
- %scevgep9 = getelementptr i8, i8* %dst, i64 %index
- %scevgep910 = bitcast i8* %scevgep9 to <4 x i8>*
- store <4 x i8> undef, <4 x i8>* %scevgep910, align 1
+ %scevgep9 = getelementptr i8, ptr %dst, i64 %index
+ store <4 x i8> undef, ptr %scevgep9, align 1
%index.next = add i64 %index, 4
%0 = icmp eq i64 undef, %index.next
br i1 %0, label %middle.block, label %vector.body
br i1 undef, label %for.end, label %for.body.preheader1
for.body.preheader1: ; preds = %middle.block
- %scevgep2 = getelementptr i8, i8* %dst, i64 0
br label %for.body
for.body: ; preds = %for.body, %for.body.preheader1
- %lsr.iv3 = phi i8* [ %scevgep2, %for.body.preheader1 ], [ %scevgep4, %for.body ]
- store i8 undef, i8* %lsr.iv3, align 1
- %scevgep4 = getelementptr i8, i8* %lsr.iv3, i64 1
+ %lsr.iv3 = phi ptr [ %dst, %for.body.preheader1 ], [ %scevgep4, %for.body ]
+ store i8 undef, ptr %lsr.iv3, align 1
+ %scevgep4 = getelementptr i8, ptr %lsr.iv3, i64 1
br label %for.body
for.end: ; preds = %middle.block, %entry
; CHECK-LABEL: test_gv_float()
define float @test_gv_float() {
; CHECK: ld.global.nc.f32
- %v = load float, float* @gv_float
+ %v = load float, ptr @gv_float
ret float %v
}
; CHECK-LABEL: test_gv_float2()
define <2 x float> @test_gv_float2() {
; CHECK: ld.global.nc.v2.f32
- %v = load <2 x float>, <2 x float>* @gv_float2
+ %v = load <2 x float>, ptr @gv_float2
ret <2 x float> %v
}
; CHECK-LABEL: test_gv_float4()
define <4 x float> @test_gv_float4() {
; CHECK: ld.global.nc.v4.f32
- %v = load <4 x float>, <4 x float>* @gv_float4
+ %v = load <4 x float>, ptr @gv_float4
ret <4 x float> %v
}
; Function Attrs: nounwind
; CHECK: .entry foo
-define void @foo(float* nocapture %a) #0 {
- %val = load float, float* %a
+define void @foo(ptr nocapture %a) #0 {
+ %val = load float, ptr %a
%tan = tail call fastcc float @__nv_fast_tanf(float %val)
- store float %tan, float* %a
+ store float %tan, ptr %a
ret void
}
!nvvm.annotations = !{!0}
-!0 = !{void (float*)* @foo, !"kernel", i32 1}
+!0 = !{ptr @foo, !"kernel", i32 1}
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-unknown-unknown"
-define void @PR24303(float* %f) {
+define void @PR24303(ptr %f) {
; CHECK-LABEL: .visible .entry PR24303(
; Do not use mov.f or mov.u to convert between float and int.
; CHECK-NOT: mov.{{f|u}}{{32|64}} %f{{[0-9]+}}, %r{{[0-9]+}}
; CHECK-NOT: mov.{{f|u}}{{32|64}} %r{{[0-9]+}}, %f{{[0-9]+}}
entry:
- %arrayidx1 = getelementptr inbounds float, float* %f, i64 1
- %0 = load float, float* %f, align 4
- %1 = load float, float* %arrayidx1, align 4
- %arrayidx2 = getelementptr inbounds float, float* %f, i64 2
- %arrayidx3 = getelementptr inbounds float, float* %f, i64 3
- %2 = load float, float* %arrayidx2, align 4
- %3 = load float, float* %arrayidx3, align 4
+ %arrayidx1 = getelementptr inbounds float, ptr %f, i64 1
+ %0 = load float, ptr %f, align 4
+ %1 = load float, ptr %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds float, ptr %f, i64 2
+ %arrayidx3 = getelementptr inbounds float, ptr %f, i64 3
+ %2 = load float, ptr %arrayidx2, align 4
+ %3 = load float, ptr %arrayidx3, align 4
%mul.i = fmul float %0, %2
%mul4.i = fmul float %1, %3
%mul5.i = fmul float %0, %3
_ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit: ; preds = %if.then.93.i, %lor.lhs.false.67.i, %land.lhs.true.i, %entry
%84 = phi i32 [ %4, %land.lhs.true.i ], [ %4, %entry ], [ %82, %if.then.93.i ], [ %4, %lor.lhs.false.67.i ]
%85 = phi i32 [ %5, %land.lhs.true.i ], [ %5, %entry ], [ %83, %if.then.93.i ], [ %5, %lor.lhs.false.67.i ]
- %arrayidx5 = getelementptr inbounds float, float* %f, i64 5
- %86 = bitcast float* %arrayidx5 to i32*
- store i32 %84, i32* %86, align 4
- %arrayidx7 = getelementptr inbounds float, float* %f, i64 6
- %87 = bitcast float* %arrayidx7 to i32*
- store i32 %85, i32* %87, align 4
+ %arrayidx5 = getelementptr inbounds float, ptr %f, i64 5
+ store i32 %84, ptr %arrayidx5, align 4
+ %arrayidx7 = getelementptr inbounds float, ptr %f, i64 6
+ store i32 %85, ptr %arrayidx7, align 4
ret void
}
!nvvm.annotations = !{!0}
-!0 = !{void (float*)* @PR24303, !"kernel", i32 1}
+!0 = !{ptr @PR24303, !"kernel", i32 1}
; CHECK-DAG: .reg .f64 %fd<
; Verify that we use correct register types.
- store i8 1, i8* %s8, align 1
+ store i8 1, ptr %s8, align 1
; CHECK: mov.u16 [[R1:%rs[0-9]]], 1;
; CHECK-NEXT: st.u8 {{.*}}, [[R1]]
- store i8 2, i8* %u8, align 1
+ store i8 2, ptr %u8, align 1
; CHECK: mov.u16 [[R2:%rs[0-9]]], 2;
; CHECK-NEXT: st.u8 {{.*}}, [[R2]]
- store i16 3, i16* %s16, align 2
+ store i16 3, ptr %s16, align 2
; CHECK: mov.u16 [[R3:%rs[0-9]]], 3;
; CHECK-NEXT: st.u16 {{.*}}, [[R3]]
- store i16 4, i16* %u16, align 2
+ store i16 4, ptr %u16, align 2
; CHECK: mov.u16 [[R4:%rs[0-9]]], 4;
; CHECK-NEXT: st.u16 {{.*}}, [[R4]]
- store i32 5, i32* %s32, align 4
+ store i32 5, ptr %s32, align 4
; CHECK: mov.u32 [[R5:%r[0-9]]], 5;
; CHECK-NEXT: st.u32 {{.*}}, [[R5]]
- store i32 6, i32* %u32, align 4
+ store i32 6, ptr %u32, align 4
; CHECK: mov.u32 [[R6:%r[0-9]]], 6;
; CHECK-NEXT: st.u32 {{.*}}, [[R6]]
- store i64 7, i64* %s64, align 8
+ store i64 7, ptr %s64, align 8
; CHECK: mov.u64 [[R7:%rd[0-9]]], 7;
; CHECK-NEXT: st.u64 {{.*}}, [[R7]]
- store i64 8, i64* %u64, align 8
+ store i64 8, ptr %u64, align 8
; CHECK: mov.u64 [[R8:%rd[0-9]]], 8;
; CHECK-NEXT: st.u64 {{.*}}, [[R8]]
; FP constants are stored via integer registers, but that's an
; implementation detail that's irrelevant here.
- store float 9.000000e+00, float* %f32, align 4
- store double 1.000000e+01, double* %f64, align 8
+ store float 9.000000e+00, ptr %f32, align 4
+ store double 1.000000e+01, ptr %f64, align 8
; Instead, we force a load into a register and then verify register type.
- %f32v = load volatile float, float* %f32, align 4
+ %f32v = load volatile float, ptr %f32, align 4
; CHECK: ld.volatile.f32 %f{{[0-9]+}}
- %f64v = load volatile double, double* %f64, align 8
+ %f64v = load volatile double, ptr %f64, align 8
; CHECK: ld.volatile.f64 %fd{{[0-9]+}}
ret void
; CHECK: ret;
; Ensure source scheduling is working
-define void @foo(i32* %a) {
+define void @foo(ptr %a) {
; CHECK: .func foo
; CHECK: ld.u32
; CHECK-NEXT: ld.u32
; CHECK-NEXT: add.s32
; CHECK-NEXT: add.s32
; CHECK-NEXT: add.s32
- %ptr0 = getelementptr i32, i32* %a, i32 0
- %val0 = load i32, i32* %ptr0
- %ptr1 = getelementptr i32, i32* %a, i32 1
- %val1 = load i32, i32* %ptr1
- %ptr2 = getelementptr i32, i32* %a, i32 2
- %val2 = load i32, i32* %ptr2
- %ptr3 = getelementptr i32, i32* %a, i32 3
- %val3 = load i32, i32* %ptr3
+ %val0 = load i32, ptr %a
+ %ptr1 = getelementptr i32, ptr %a, i32 1
+ %val1 = load i32, ptr %ptr1
+ %ptr2 = getelementptr i32, ptr %a, i32 2
+ %val2 = load i32, ptr %ptr2
+ %ptr3 = getelementptr i32, ptr %a, i32 3
+ %val3 = load i32, ptr %ptr3
%t0 = add i32 %val0, %val1
%t1 = add i32 %t0, %val2
%t2 = add i32 %t1, %val3
- store i32 %t2, i32* %a
+ store i32 %t2, ptr %a
ret void
}
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
-define void @foo(<2 x i32>* %a) {
+define void @foo(ptr %a) {
; CHECK: .func foo
; CHECK: ld.v2.u32
; CHECK-NEXT: ld.v2.u32
; CHECK-NEXT: add.s32
; CHECK-NEXT: add.s32
; CHECK-NEXT: add.s32
- %ptr0 = getelementptr <2 x i32>, <2 x i32>* %a, i32 0
- %val0 = load <2 x i32>, <2 x i32>* %ptr0
- %ptr1 = getelementptr <2 x i32>, <2 x i32>* %a, i32 1
- %val1 = load <2 x i32>, <2 x i32>* %ptr1
- %ptr2 = getelementptr <2 x i32>, <2 x i32>* %a, i32 2
- %val2 = load <2 x i32>, <2 x i32>* %ptr2
- %ptr3 = getelementptr <2 x i32>, <2 x i32>* %a, i32 3
- %val3 = load <2 x i32>, <2 x i32>* %ptr3
+ %val0 = load <2 x i32>, ptr %a
+ %ptr1 = getelementptr <2 x i32>, ptr %a, i32 1
+ %val1 = load <2 x i32>, ptr %ptr1
+ %ptr2 = getelementptr <2 x i32>, ptr %a, i32 2
+ %val2 = load <2 x i32>, ptr %ptr2
+ %ptr3 = getelementptr <2 x i32>, ptr %a, i32 3
+ %val3 = load <2 x i32>, ptr %ptr3
%t0 = add <2 x i32> %val0, %val1
%t1 = add <2 x i32> %t0, %val2
%t2 = add <2 x i32> %t1, %val3
- store <2 x i32> %t2, <2 x i32>* %a
+ store <2 x i32> %t2, ptr %a
ret void
}
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-define void @one(i64 %a, i64 %b, i64* %p1, i64* %p2) {
+define void @one(i64 %a, i64 %b, ptr %p1, ptr %p2) {
; CHECK: cvt.s64.s8
; CHECK: cvt.s64.s8
entry:
%shr = ashr i64 %a, 16
%shr9 = ashr i64 %b, 16
%add = add nsw i64 %conv4, %conv1
- store i64 %add, i64* %p1, align 8
+ store i64 %add, ptr %p1, align 8
%add17 = add nsw i64 %shr9, %shr
- store i64 %add17, i64* %p2, align 8
+ store i64 %add17, ptr %p2, align 8
ret void
}
-define void @two(i64 %a, i64 %b, i64* %p1, i64* %p2) {
+define void @two(i64 %a, i64 %b, ptr %p1, ptr %p2) {
entry:
; CHECK: cvt.s64.s32
; CHECK: cvt.s64.s32
%shr = ashr i64 %a, 16
%shr9 = ashr i64 %b, 16
%add = add nsw i64 %conv4, %conv1
- store i64 %add, i64* %p1, align 8
+ store i64 %add, ptr %p1, align 8
%add17 = add nsw i64 %shr9, %shr
- store i64 %add17, i64* %p2, align 8
+ store i64 %add17, ptr %p2, align 8
ret void
}
-define void @three(i64 %a, i64 %b, i64* %p1, i64* %p2) {
+define void @three(i64 %a, i64 %b, ptr %p1, ptr %p2) {
entry:
; CHECK: cvt.s64.s16
; CHECK: cvt.s64.s16
%shr = ashr i64 %a, 16
%shr9 = ashr i64 %b, 16
%add = add nsw i64 %conv4, %conv1
- store i64 %add, i64* %p1, align 8
+ store i64 %add, ptr %p1, align 8
%add17 = add nsw i64 %shr9, %shr
- store i64 %add17, i64* %p2, align 8
+ store i64 %add17, ptr %p2, align 8
ret void
}
-define void @four(i32 %a, i32 %b, i32* %p1, i32* %p2) {
+define void @four(i32 %a, i32 %b, ptr %p1, ptr %p2) {
entry:
; CHECK: cvt.s32.s8
; CHECK: cvt.s32.s8
%shr = ashr i32 %a, 16
%shr9 = ashr i32 %b, 16
%add = add nsw i32 %conv4, %conv1
- store i32 %add, i32* %p1, align 4
+ store i32 %add, ptr %p1, align 4
%add17 = add nsw i32 %shr9, %shr
- store i32 %add17, i32* %p2, align 4
+ store i32 %add17, ptr %p2, align 4
ret void
}
-define void @five(i32 %a, i32 %b, i32* %p1, i32* %p2) {
+define void @five(i32 %a, i32 %b, ptr %p1, ptr %p2) {
entry:
; CHECK: cvt.s32.s16
; CHECK: cvt.s32.s16
%shr = ashr i32 %a, 16
%shr9 = ashr i32 %b, 16
%add = add nsw i32 %conv4, %conv1
- store i32 %add, i32* %p1, align 4
+ store i32 %add, ptr %p1, align 4
%add17 = add nsw i32 %shr9, %shr
- store i32 %add17, i32* %p2, align 4
+ store i32 %add17, ptr %p2, align 4
ret void
}
-define void @six(i16 %a, i16 %b, i16* %p1, i16* %p2) {
+define void @six(i16 %a, i16 %b, ptr %p1, ptr %p2) {
entry:
; CHECK: cvt.s16.s8
; CHECK: cvt.s16.s8
%shr = ashr i16 %a, 8
%shr9 = ashr i16 %b, 8
%add = add nsw i16 %conv4, %conv1
- store i16 %add, i16* %p1, align 4
+ store i16 %add, ptr %p1, align 4
%add17 = add nsw i16 %shr9, %shr
- store i16 %add17, i16* %p2, align 4
+ store i16 %add17, ptr %p2, align 4
ret void
}
; Try the rest of the shfl modes. Hopefully they're declared in such a way
; that if shfl.down works correctly, they also work correctly.
-define void @shfl_rest(i32 %in_i32, float %in_float, i32* %out_i32, float* %out_float) {
+define void @shfl_rest(i32 %in_i32, float %in_float, ptr %out_i32, ptr %out_float) {
; CHECK: shfl.up.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 1, 2;
%up_i32 = call i32 @llvm.nvvm.shfl.up.i32(i32 %in_i32, i32 1, i32 2)
- store i32 %up_i32, i32* %out_i32
+ store i32 %up_i32, ptr %out_i32
; CHECK: shfl.up.b32 %f{{[0-9]+}}, %f{{[0-9]+}}, 3, 4;
%up_float = call float @llvm.nvvm.shfl.up.f32(float %in_float, i32 3, i32 4)
- store float %up_float, float* %out_float
+ store float %up_float, ptr %out_float
; CHECK: shfl.bfly.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 5, 6;
%bfly_i32 = call i32 @llvm.nvvm.shfl.bfly.i32(i32 %in_i32, i32 5, i32 6)
- store i32 %bfly_i32, i32* %out_i32
+ store i32 %bfly_i32, ptr %out_i32
; CHECK: shfl.bfly.b32 %f{{[0-9]+}}, %f{{[0-9]+}}, 7, 8;
%bfly_float = call float @llvm.nvvm.shfl.bfly.f32(float %in_float, i32 7, i32 8)
- store float %bfly_float, float* %out_float
+ store float %bfly_float, ptr %out_float
; CHECK: shfl.idx.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 9, 10;
%idx_i32 = call i32 @llvm.nvvm.shfl.idx.i32(i32 %in_i32, i32 9, i32 10)
- store i32 %idx_i32, i32* %out_i32
+ store i32 %idx_i32, ptr %out_i32
; CHECK: shfl.idx.b32 %f{{[0-9]+}}, %f{{[0-9]+}}, 11, 12;
%idx_float = call float @llvm.nvvm.shfl.idx.f32(float %in_float, i32 11, i32 12)
- store float %idx_float, float* %out_float
+ store float %idx_float, ptr %out_float
ret void
}
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
; CHECK: shift_parts_left_128
-define void @shift_parts_left_128(i128* %val, i128* %amtptr) {
+define void @shift_parts_left_128(ptr %val, ptr %amtptr) {
; CHECK: shl.b64
; CHECK: mov.u32
; CHECK: sub.s32
; CHECK: setp.gt.s32
; CHECK: selp.b64
; CHECK: shl.b64
- %amt = load i128, i128* %amtptr
- %a = load i128, i128* %val
+ %amt = load i128, ptr %amtptr
+ %a = load i128, ptr %val
%val0 = shl i128 %a, %amt
- store i128 %val0, i128* %val
+ store i128 %val0, ptr %val
ret void
}
; CHECK: shift_parts_right_128
-define void @shift_parts_right_128(i128* %val, i128* %amtptr) {
+define void @shift_parts_right_128(ptr %val, ptr %amtptr) {
; CHECK: shr.u64
; CHECK: sub.s32
; CHECK: shl.b64
; CHECK: setp.gt.s32
; CHECK: selp.b64
; CHECK: shr.s64
- %amt = load i128, i128* %amtptr
- %a = load i128, i128* %val
+ %amt = load i128, ptr %amtptr
+ %a = load i128, ptr %val
%val0 = ashr i128 %a, %amt
- store i128 %val0, i128* %val
+ store i128 %val0, ptr %val
ret void
}
}\r
\r
; CHECK: .entry kernel_func\r
-define void @kernel_func(float* %a) {\r
- %val = load float, float* %a\r
+define void @kernel_func(ptr %a) {\r
+ %val = load float, ptr %a\r
; CHECK: call.uni (retval0),\r
; CHECK: device_func,\r
%mul = call float @device_func(float %val)\r
- store float %mul, float* %a\r
+ store float %mul, ptr %a\r
ret void\r
}\r
\r
\r
!nvvm.annotations = !{!1}\r
\r
-!1 = !{void (float*)* @kernel_func, !"kernel", i32 1}\r
+!1 = !{ptr @kernel_func, !"kernel", i32 1}\r
;; i8
-define void @st_global_i8(i8 addrspace(0)* %ptr, i8 %a) {
+define void @st_global_i8(ptr addrspace(0) %ptr, i8 %a) {
; PTX32: st.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
; PTX32: ret
; PTX64: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
; PTX64: ret
- store i8 %a, i8 addrspace(0)* %ptr
+ store i8 %a, ptr addrspace(0) %ptr
ret void
}
;; i16
-define void @st_global_i16(i16 addrspace(0)* %ptr, i16 %a) {
+define void @st_global_i16(ptr addrspace(0) %ptr, i16 %a) {
; PTX32: st.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
; PTX32: ret
; PTX64: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
; PTX64: ret
- store i16 %a, i16 addrspace(0)* %ptr
+ store i16 %a, ptr addrspace(0) %ptr
ret void
}
;; i32
-define void @st_global_i32(i32 addrspace(0)* %ptr, i32 %a) {
+define void @st_global_i32(ptr addrspace(0) %ptr, i32 %a) {
; PTX32: st.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
; PTX32: ret
; PTX64: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
; PTX64: ret
- store i32 %a, i32 addrspace(0)* %ptr
+ store i32 %a, ptr addrspace(0) %ptr
ret void
}
;; i64
-define void @st_global_i64(i64 addrspace(0)* %ptr, i64 %a) {
+define void @st_global_i64(ptr addrspace(0) %ptr, i64 %a) {
; PTX32: st.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
; PTX32: ret
; PTX64: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
; PTX64: ret
- store i64 %a, i64 addrspace(0)* %ptr
+ store i64 %a, ptr addrspace(0) %ptr
ret void
}
;; f32
-define void @st_global_f32(float addrspace(0)* %ptr, float %a) {
+define void @st_global_f32(ptr addrspace(0) %ptr, float %a) {
; PTX32: st.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
; PTX32: ret
; PTX64: st.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
; PTX64: ret
- store float %a, float addrspace(0)* %ptr
+ store float %a, ptr addrspace(0) %ptr
ret void
}
;; f64
-define void @st_global_f64(double addrspace(0)* %ptr, double %a) {
+define void @st_global_f64(ptr addrspace(0) %ptr, double %a) {
; PTX32: st.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
; PTX32: ret
; PTX64: st.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
; PTX64: ret
- store double %a, double addrspace(0)* %ptr
+ store double %a, ptr addrspace(0) %ptr
ret void
}
%struct.StNoalign = type { [5 x i32] }
-define %struct.StNoalign @func_StNoalign(%struct.StNoalign* nocapture noundef readonly byval(%struct.StNoalign) align 4 %in) {
+define %struct.StNoalign @func_StNoalign(ptr nocapture noundef readonly byval(%struct.StNoalign) align 4 %in) {
; CHECK-LABEL: .func{{.*}}func_StNoalign
; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [func_StNoalign_param_0];
; CHECK-NOT: st.param.b32 [func_retval0+0], %r{{[0-9]+}};
; CHECK-NOT: st.param.b32 [func_retval0+12], %r{{[0-9]+}};
; CHECK: st.param.b32 [func_retval0+16], [[R1]];
; CHECK-NEXT: ret;
- %arrayidx = getelementptr inbounds %struct.StNoalign, %struct.StNoalign* %in, i32 0, i32 0, i32 0
- %1 = load i32, i32* %arrayidx, align 4
+ %1 = load i32, ptr %in, align 4
%.fca.0.4.insert = insertvalue %struct.StNoalign { [5 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 poison] }, i32 %1, 0, 4
ret %struct.StNoalign %.fca.0.4.insert
}
%struct.StAlign8 = type { [5 x i32], [4 x i8] }
-define %struct.StAlign8 @func_StAlign8(%struct.StAlign8* nocapture noundef readonly byval(%struct.StAlign8) align 8 %in) {
+define %struct.StAlign8 @func_StAlign8(ptr nocapture noundef readonly byval(%struct.StAlign8) align 8 %in) {
; CHECK-LABEL: .func{{.*}}func_StAlign8
; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [func_StAlign8_param_0];
; CHECK-NOT: st.param.b32 [func_retval0+0], %r{{[0-9]+}};
; CHECK: st.param.b32 [func_retval0+16], [[R1]];
; CHECK-NOT: st.param.v4.b8 [func_retval0+20], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
; CHECK-NEXT: ret;
- %arrayidx = getelementptr inbounds %struct.StAlign8, %struct.StAlign8* %in, i32 0, i32 0, i32 0
- %1 = load i32, i32* %arrayidx, align 8
+ %1 = load i32, ptr %in, align 8
%.fca.0.4.insert = insertvalue %struct.StAlign8 { [5 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 poison], [4 x i8] poison }, i32 %1, 0, 4
ret %struct.StAlign8 %.fca.0.4.insert
}
%struct.StAlign16 = type { [5 x i32], [12 x i8] }
-define %struct.StAlign16 @func_StAlign16(%struct.StAlign16* nocapture noundef readonly byval(%struct.StAlign16) align 16 %in) {
+define %struct.StAlign16 @func_StAlign16(ptr nocapture noundef readonly byval(%struct.StAlign16) align 16 %in) {
; CHECK-LABEL: .func{{.*}}func_StAlign16
; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [func_StAlign16_param_0];
; CHECK-NOT: st.param.b32 [func_retval0+0], %r{{[0-9]+}};
; CHECK-NOT: st.param.v4.b8 [func_retval0+24], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
; CHECK-NOT: st.param.v4.b8 [func_retval0+28], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
; CHECK-NEXT: ret;
- %arrayidx = getelementptr inbounds %struct.StAlign16, %struct.StAlign16* %in, i32 0, i32 0, i32 0
- %1 = load i32, i32* %arrayidx, align 16
+ %1 = load i32, ptr %in, align 16
%.fca.0.4.insert = insertvalue %struct.StAlign16 { [5 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 poison], [12 x i8] poison }, i32 %1, 0, 4
ret %struct.StAlign16 %.fca.0.4.insert
}
target triple = "nvptx-unknown-cuda"
declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
-declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
+declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
; SM20-LABEL: .entry foo
; SM30-LABEL: .entry foo
-define void @foo(i64 %img, float* %red, i32 %idx) {
+define void @foo(i64 %img, ptr %red, i32 %idx) {
; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
%ret = sitofp i32 %val to float
; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]]
; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]]
- store float %ret, float* %red
+ store float %ret, ptr %red
ret void
}
; SM20-LABEL: .entry bar
; SM30-LABEL: .entry bar
-define void @bar(float* %red, i32 %idx) {
+define void @bar(ptr %red, i32 %idx) {
; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
- %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
+ %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [surf0, {%r{{[0-9]+}}}]
; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFHANDLE]], {%r{{[0-9]+}}}]
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %surfHandle, i32 %idx)
%ret = sitofp i32 %val to float
; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]]
; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]]
- store float %ret, float* %red
+ store float %ret, ptr %red
ret void
}
!nvvm.annotations = !{!1, !2, !3}
-!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1}
-!2 = !{void (float*, i32)* @bar, !"kernel", i32 1}
-!3 = !{i64 addrspace(1)* @surf0, !"surface", i32 1}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @bar, !"kernel", i32 1}
+!3 = !{ptr addrspace(1) @surf0, !"surface", i32 1}
declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
; CHECK: .entry foo
-define void @foo(i64 %img, float* %red, i32 %idx) {
+define void @foo(i64 %img, ptr %red, i32 %idx) {
; CHECK: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [foo_param_0, {%r{{[0-9]+}}}]
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
; CHECK: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
%ret = sitofp i32 %val to float
; CHECK: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
- store float %ret, float* %red
+ store float %ret, ptr %red
ret void
}
!nvvm.annotations = !{!1, !2}
-!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1}
-!2 = !{void (i64, float*, i32)* @foo, !"rdwrimage", i32 0}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @foo, !"rdwrimage", i32 0}
target triple = "nvptx-unknown-cuda"
declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32)
-declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
+declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
; SM20-LABEL: .entry foo
; SM30-LABEL: .entry bar
define void @bar(i32 %val, i32 %idx) {
; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
- %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
+ %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
; SM20: sust.b.1d.b32.trap [surf0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %surfHandle, i32 %idx, i32 %val)
!nvvm.annotations = !{!1, !2, !3}
-!1 = !{void (i64, i32, i32)* @foo, !"kernel", i32 1}
-!2 = !{void (i32, i32)* @bar, !"kernel", i32 1}
-!3 = !{i64 addrspace(1)* @surf0, !"surface", i32 1}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @bar, !"kernel", i32 1}
+!3 = !{ptr addrspace(1) @surf0, !"surface", i32 1}
}
!nvvm.annotations = !{!1, !2}
-!1 = !{void (i64, i32, i32)* @foo, !"kernel", i32 1}
-!2 = !{void (i64, i32, i32)* @foo, !"wroimage", i32 0}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @foo, !"wroimage", i32 0}
; Function Attrs: nounwind
define internal void @.function.() {
entry:
- %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0))
+ %call = call i32 (ptr, ...) @printf(ptr @.str)
ret void
}
; Function Attrs: nounwind
define internal void @_$_function_$_() {
entry:
- %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @_$_str, i32 0, i32 0))
+ %call = call i32 (ptr, ...) @printf(ptr @_$_str)
ret void
}
ret void
}
-declare i32 @printf(i8*, ...)
+declare i32 @printf(ptr, ...)
target triple = "nvptx-unknown-cuda"
declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32)
-declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
+declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
; SM20-LABEL: .entry foo
; SM30-LABEL: .entry foo
-define void @foo(i64 %img, float* %red, i32 %idx) {
+define void @foo(i64 %img, ptr %red, i32 %idx) {
; SM20: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0];
; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
; SM30: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0];
%ret = extractvalue { float, float, float, float } %val, 0
; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
- store float %ret, float* %red
+ store float %ret, ptr %red
ret void
}
; SM20-LABEL: .entry bar
; SM30-LABEL: .entry bar
-define void @bar(float* %red, i32 %idx) {
+define void @bar(ptr %red, i32 %idx) {
; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
- %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0)
+ %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
%ret = extractvalue { float, float, float, float } %val, 0
; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
- store float %ret, float* %red
+ store float %ret, ptr %red
ret void
}
; SM20-LABEL: .entry baz
; SM30-LABEL: .entry baz
-define void @baz(float* %red, i32 %idx) {
+define void @baz(ptr %red, i32 %idx) {
; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
- %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0)
+ %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
%ret2 = fadd float %ret, %texcall
; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RET2]]
; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RET2]]
- store float %ret2, float* %red
+ store float %ret2, ptr %red
ret void
}
!nvvm.annotations = !{!1, !2, !3, !4}
-!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1}
-!2 = !{void (float*, i32)* @bar, !"kernel", i32 1}
-!3 = !{i64 addrspace(1)* @tex0, !"texture", i32 1}
-!4 = !{void (float*, i32)* @baz, !"kernel", i32 1}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @bar, !"kernel", i32 1}
+!3 = !{ptr addrspace(1) @tex0, !"texture", i32 1}
+!4 = !{ptr @baz, !"kernel", i32 1}
declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64, i64, i32)
; CHECK: .entry foo
-define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) {
+define void @foo(i64 %img, i64 %sampler, ptr %red, i32 %idx) {
; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64 %img, i64 %sampler, i32 %idx)
%ret = extractvalue { float, float, float, float } %val, 0
; CHECK: st.f32 [%r{{[0-9]+}}], %f[[RED]]
- store float %ret, float* %red
+ store float %ret, ptr %red
ret void
}
!nvvm.annotations = !{!1, !2, !3}
-!1 = !{void (i64, i64, float*, i32)* @foo, !"kernel", i32 1}
-!2 = !{void (i64, i64, float*, i32)* @foo, !"rdoimage", i32 0}
-!3 = !{void (i64, i64, float*, i32)* @foo, !"sampler", i32 1}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @foo, !"rdoimage", i32 0}
+!3 = !{ptr @foo, !"sampler", i32 1}
declare i32 @llvm.nvvm.txq.height(i64)
declare i32 @llvm.nvvm.suq.width(i64)
declare i32 @llvm.nvvm.suq.height(i64)
-declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
+declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
; SM20-LABEL: @t0
; SM30-LABEL: @t1
define i32 @t1() {
; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], tex0
- %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0)
+ %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
; SM20: txq.width.b32 %r{{[0-9]+}}, [tex0]
; SM30: txq.width.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]]
%width = tail call i32 @llvm.nvvm.txq.width(i64 %texHandle)
; SM30-LABEL: @t3
define i32 @t3() {
; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], tex0
- %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0)
+ %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
; SM20: txq.height.b32 %r{{[0-9]+}}, [tex0]
; SM30: txq.height.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]]
%height = tail call i32 @llvm.nvvm.txq.height(i64 %texHandle)
; SM30-LABEL: @s1
define i32 @s1() {
; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], surf0
- %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
+ %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
; SM20: suq.width.b32 %r{{[0-9]+}}, [surf0]
; SM30: suq.width.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]]
%width = tail call i32 @llvm.nvvm.suq.width(i64 %surfHandle)
; SM30-LABEL: @s3
define i32 @s3() {
; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], surf0
- %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
+ %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
; SM20: suq.height.b32 %r{{[0-9]+}}, [surf0]
; SM30: suq.height.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]]
%height = tail call i32 @llvm.nvvm.suq.height(i64 %surfHandle)
!nvvm.annotations = !{!1, !2}
-!1 = !{i64 addrspace(1)* @tex0, !"texture", i32 1}
-!2 = !{i64 addrspace(1)* @surf0, !"surface", i32 1}
+!1 = !{ptr addrspace(1) @tex0, !"texture", i32 1}
+!2 = !{ptr addrspace(1) @surf0, !"surface", i32 1}
; RUN: llc < %s -march=nvptx -mcpu=sm_20 %if ptxas %{ | %ptxas-verify %}
-define ptx_device void @test_function({i8, i8}*) {
+define ptx_device void @test_function(ptr) {
ret void
}
; CHECK: .address_size [[BITS:32|64]]
-%struct.__va_list_tag = type { i8*, i8*, i32, i32 }
+%struct.__va_list_tag = type { ptr, ptr, i32, i32 }
-@foo_ptr = internal addrspace(1) global i32 (i32, ...)* @foo, align 8
+@foo_ptr = internal addrspace(1) global ptr @foo, align 8
define i32 @foo(i32 %a, ...) {
entry:
%al = alloca [1 x %struct.__va_list_tag], align 8
- %ap = bitcast [1 x %struct.__va_list_tag]* %al to i8*
%al2 = alloca [1 x %struct.__va_list_tag], align 8
- %ap2 = bitcast [1 x %struct.__va_list_tag]* %al2 to i8*
; Test va_start
; CHECK: .param .align 8 .b8 foo_vararg[]
; CHECK: mov.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], foo_vararg;
; CHECK-NEXT: st.u[[BITS]] [%SP+0], [[VA_PTR]];
- call void @llvm.va_start(i8* %ap)
+ call void @llvm.va_start(ptr %al)
; Test va_copy()
; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0];
; CHECK-NEXT: st.u[[BITS]] [%SP+{{[0-9]+}}], [[VA_PTR]];
- call void @llvm.va_copy(i8* %ap2, i8* %ap)
+ call void @llvm.va_copy(ptr %al2, ptr %al)
; Test va_arg(ap, int32_t)
; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0];
; CHECK-NEXT: st.u[[BITS]] [%SP+0], [[VA_PTR_NEXT]];
; CHECK-NEXT: ld.local.u32 %r{{[0-9]+}}, [[[VA_PTR_ALIGN]]];
- %0 = va_arg i8* %ap, i32
+ %0 = va_arg ptr %al, i32
; Test va_arg(ap, int64_t)
; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0];
; CHECK-NEXT: st.u[[BITS]] [%SP+0], [[VA_PTR_NEXT]];
; CHECK-NEXT: ld.local.u64 %rd{{[0-9]+}}, [[[VA_PTR_ALIGN]]];
- %1 = va_arg i8* %ap, i64
+ %1 = va_arg ptr %al, i64
; Test va_arg(ap, double)
; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0];
; CHECK-NEXT: st.u[[BITS]] [%SP+0], [[VA_PTR_NEXT]];
; CHECK-NEXT: ld.local.f64 %fd{{[0-9]+}}, [[[VA_PTR_ALIGN]]];
- %2 = va_arg i8* %ap, double
+ %2 = va_arg ptr %al, double
-; Test va_arg(ap, void *)
+; Test va_arg(ap, ptr)
; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0];
; CHECK32-NEXT: add.s32 [[VA_PTR_TMP:%r[0-9]+]], [[VA_PTR]], 3;
; CHECK64-NEXT: add.s64 [[VA_PTR_TMP:%rd[0-9]+]], [[VA_PTR]], 7;
; CHECK-NEXT: st.u[[BITS]] [%SP+0], [[VA_PTR_NEXT]];
; CHECK-NEXT: ld.local.u[[BITS]] %{{(r|rd)[0-9]+}}, [[[VA_PTR_ALIGN]]];
- %3 = va_arg i8* %ap, i8*
- %call = call i32 @bar(i32 %a, i32 %0, i64 %1, double %2, i8* %3)
+ %3 = va_arg ptr %al, ptr
+ %call = call i32 @bar(i32 %a, i32 %0, i64 %1, double %2, ptr %3)
- call void @llvm.va_end(i8* %ap)
- %4 = va_arg i8* %ap2, i32
- call void @llvm.va_end(i8* %ap2)
+ call void @llvm.va_end(ptr %al)
+ %4 = va_arg ptr %al2, i32
+ call void @llvm.va_end(ptr %al2)
%5 = add i32 %call, %4
ret i32 %5
}
-define i32 @test_foo(i32 %i, i64 %l, double %d, i8* %p) {
+define i32 @test_foo(i32 %i, i64 %l, double %d, ptr %p) {
; Test indirect variadic function call.
; Load arguments to temporary variables
; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[]
entry:
- %ptr = load i32 (i32, ...)*, i32 (i32, ...)** addrspacecast (i32 (i32, ...)* addrspace(1)* @foo_ptr to i32 (i32, ...)**), align 8
- %call = call i32 (i32, ...) %ptr(i32 4, i32 %i, i64 %l, double %d, i8* %p)
+ %ptr = load ptr, ptr addrspacecast (ptr addrspace(1) @foo_ptr to ptr), align 8
+ %call = call i32 (i32, ...) %ptr(i32 4, i32 %i, i64 %l, double %d, ptr %p)
ret i32 %call
}
-declare void @llvm.va_start(i8*)
-declare void @llvm.va_end(i8*)
-declare void @llvm.va_copy(i8*, i8*)
-declare i32 @bar(i32, i32, i64, double, i8*)
+declare void @llvm.va_start(ptr)
+declare void @llvm.va_end(ptr)
+declare void @llvm.va_copy(ptr, ptr)
+declare i32 @bar(i32, i32, i64, double, ptr)
target triple = "nvptx-unknown-cuda"
; CHECK: .visible .func foo
-define void @foo(<8 x i8> %a, i8* %b) {
+define void @foo(<8 x i8> %a, ptr %b) {
; CHECK-DAG: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [foo_param_0]
; CHECK-DAG: ld.param.v4.u8 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [foo_param_0+4]
; CHECK-DAG: ld.param.u32 %[[B:r[0-9+]]], [foo_param_1]
%t0 = extractelement <8 x i8> %a, i32 1
%t1 = extractelement <8 x i8> %a, i32 6
%t = add i8 %t0, %t1
- store i8 %t, i8* %b
+ store i8 %t, ptr %b
ret void
}
; scalarized. If codegen fails, then the type legalizer incorrectly
; tried to promote <2 x i1> to <2 x i8> and instruction selection failed.
-define void @foo(<2 x i32>* %a, <2 x i32>* %b, i32* %r1, i32* %r2) {
- %aval = load <2 x i32>, <2 x i32>* %a
- %bval = load <2 x i32>, <2 x i32>* %b
+define void @foo(ptr %a, ptr %b, ptr %r1, ptr %r2) {
+ %aval = load <2 x i32>, ptr %a
+ %bval = load <2 x i32>, ptr %b
%res = icmp slt <2 x i32> %aval, %bval
%t1 = extractelement <2 x i1> %res, i32 0
%t2 = extractelement <2 x i1> %res, i32 1
%t1a = zext i1 %t1 to i32
%t2a = zext i1 %t2 to i32
- store i32 %t1a, i32* %r1
- store i32 %t2a, i32* %r2
+ store i32 %t1a, ptr %r1
+ store i32 %t2a, ptr %r2
ret void
}
; which will load two floats at once into scalar registers.
; CHECK-LABEL: foo
-define void @foo(<2 x float>* %a) {
+define void @foo(ptr %a) {
; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}
- %t1 = load <2 x float>, <2 x float>* %a
+ %t1 = load <2 x float>, ptr %a
%t2 = fmul <2 x float> %t1, %t1
- store <2 x float> %t2, <2 x float>* %a
+ store <2 x float> %t2, ptr %a
ret void
}
; CHECK-LABEL: foo2
-define void @foo2(<4 x float>* %a) {
+define void @foo2(ptr %a) {
; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
- %t1 = load <4 x float>, <4 x float>* %a
+ %t1 = load <4 x float>, ptr %a
%t2 = fmul <4 x float> %t1, %t1
- store <4 x float> %t2, <4 x float>* %a
+ store <4 x float> %t2, ptr %a
ret void
}
; CHECK-LABEL: foo3
-define void @foo3(<8 x float>* %a) {
+define void @foo3(ptr %a) {
; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
- %t1 = load <8 x float>, <8 x float>* %a
+ %t1 = load <8 x float>, ptr %a
%t2 = fmul <8 x float> %t1, %t1
- store <8 x float> %t2, <8 x float>* %a
+ store <8 x float> %t2, ptr %a
ret void
}
; CHECK-LABEL: foo4
-define void @foo4(<2 x i32>* %a) {
+define void @foo4(ptr %a) {
; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}
- %t1 = load <2 x i32>, <2 x i32>* %a
+ %t1 = load <2 x i32>, ptr %a
%t2 = mul <2 x i32> %t1, %t1
- store <2 x i32> %t2, <2 x i32>* %a
+ store <2 x i32> %t2, ptr %a
ret void
}
; CHECK-LABEL: foo5
-define void @foo5(<4 x i32>* %a) {
+define void @foo5(ptr %a) {
; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
- %t1 = load <4 x i32>, <4 x i32>* %a
+ %t1 = load <4 x i32>, ptr %a
%t2 = mul <4 x i32> %t1, %t1
- store <4 x i32> %t2, <4 x i32>* %a
+ store <4 x i32> %t2, ptr %a
ret void
}
; CHECK-LABEL: foo6
-define void @foo6(<8 x i32>* %a) {
+define void @foo6(ptr %a) {
; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
- %t1 = load <8 x i32>, <8 x i32>* %a
+ %t1 = load <8 x i32>, ptr %a
%t2 = mul <8 x i32> %t1, %t1
- store <8 x i32> %t2, <8 x i32>* %a
+ store <8 x i32> %t2, ptr %a
ret void
}
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #0
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
; CHECK-LABEL: foo_complex
-define void @foo_complex(i8* nocapture readonly align 16 dereferenceable(134217728) %alloc0) {
- %targ0.1.typed = bitcast i8* %alloc0 to [1024 x [131072 x i8]]*
+define void @foo_complex(ptr nocapture readonly align 16 dereferenceable(134217728) %alloc0) {
%t0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !1
%t1 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
%t2 = lshr i32 %t1, 8
%t10 = or i32 %t4, 129
%t11 = zext i32 %t10 to i64
%t20 = zext i32 %t2 to i64
- %t27 = getelementptr inbounds [1024 x [131072 x i8]], [1024 x [131072 x i8]]* %targ0.1.typed, i64 0, i64 %t20, i64 %t9
+ %t27 = getelementptr inbounds [1024 x [131072 x i8]], ptr %alloc0, i64 0, i64 %t20, i64 %t9
; CHECK: ld.v2.u8
- %t28 = load i8, i8* %t27, align 2
- %t31 = getelementptr inbounds [1024 x [131072 x i8]], [1024 x [131072 x i8]]* %targ0.1.typed, i64 0, i64 %t20, i64 %t11
- %t32 = load i8, i8* %t31, align 1
+ %t28 = load i8, ptr %t27, align 2
+ %t31 = getelementptr inbounds [1024 x [131072 x i8]], ptr %alloc0, i64 0, i64 %t20, i64 %t11
+ %t32 = load i8, ptr %t31, align 1
%t33 = icmp ult i8 %t28, %t32
%t34 = select i1 %t33, i8 %t32, i8 %t28
- store i8 %t34, i8* %t31
+ store i8 %t34, ptr %t31
; CHECK: ret
ret void
}
; This test makes sure that vector selects are scalarized by the type legalizer.
; If not, type legalization will fail.
-define void @foo(<2 x i32> addrspace(1)* %def_a, <2 x i32> addrspace(1)* %def_b, <2 x i32> addrspace(1)* %def_c) {
+define void @foo(ptr addrspace(1) %def_a, ptr addrspace(1) %def_b, ptr addrspace(1) %def_c) {
entry:
- %tmp4 = load <2 x i32>, <2 x i32> addrspace(1)* %def_a
- %tmp6 = load <2 x i32>, <2 x i32> addrspace(1)* %def_c
- %tmp8 = load <2 x i32>, <2 x i32> addrspace(1)* %def_b
+ %tmp4 = load <2 x i32>, ptr addrspace(1) %def_a
+ %tmp6 = load <2 x i32>, ptr addrspace(1) %def_c
+ %tmp8 = load <2 x i32>, ptr addrspace(1) %def_b
%0 = icmp sge <2 x i32> %tmp4, zeroinitializer
%cond = select <2 x i1> %0, <2 x i32> %tmp6, <2 x i32> %tmp8
- store <2 x i32> %cond, <2 x i32> addrspace(1)* %def_c
+ store <2 x i32> %cond, ptr addrspace(1) %def_c
ret void
}
; CHECK: .visible .func foo1
; CHECK: st.v2.f32
-define void @foo1(<2 x float> %val, <2 x float>* %ptr) {
- store <2 x float> %val, <2 x float>* %ptr
+define void @foo1(<2 x float> %val, ptr %ptr) {
+ store <2 x float> %val, ptr %ptr
ret void
}
; CHECK: .visible .func foo2
; CHECK: st.v4.f32
-define void @foo2(<4 x float> %val, <4 x float>* %ptr) {
- store <4 x float> %val, <4 x float>* %ptr
+define void @foo2(<4 x float> %val, ptr %ptr) {
+ store <4 x float> %val, ptr %ptr
ret void
}
; CHECK: .visible .func foo3
; CHECK: st.v2.u32
-define void @foo3(<2 x i32> %val, <2 x i32>* %ptr) {
- store <2 x i32> %val, <2 x i32>* %ptr
+define void @foo3(<2 x i32> %val, ptr %ptr) {
+ store <2 x i32> %val, ptr %ptr
ret void
}
; CHECK: .visible .func foo4
; CHECK: st.v4.u32
-define void @foo4(<4 x i32> %val, <4 x i32>* %ptr) {
- store <4 x i32> %val, <4 x i32>* %ptr
+define void @foo4(<4 x i32> %val, ptr %ptr) {
+ store <4 x i32> %val, ptr %ptr
ret void
}
; CHECK: ld.global.v2.f32
; CHECK: st.global.v2.f32
; CHECK: st.global.v2.f32
-define void @test1(float addrspace(1)* noalias align 8 %in, float addrspace(1)* noalias align 8 %out) {
- %in.1 = getelementptr float, float addrspace(1)* %in, i32 1
- %in.2 = getelementptr float, float addrspace(1)* %in, i32 2
- %in.3 = getelementptr float, float addrspace(1)* %in, i32 3
- %v0 = load float, float addrspace(1)* %in, align 8
- %v1 = load float, float addrspace(1)* %in.1, align 4
- %v2 = load float, float addrspace(1)* %in.2, align 8
- %v3 = load float, float addrspace(1)* %in.3, align 4
+define void @test1(ptr addrspace(1) noalias align 8 %in, ptr addrspace(1) noalias align 8 %out) {
+ %in.1 = getelementptr float, ptr addrspace(1) %in, i32 1
+ %in.2 = getelementptr float, ptr addrspace(1) %in, i32 2
+ %in.3 = getelementptr float, ptr addrspace(1) %in, i32 3
+ %v0 = load float, ptr addrspace(1) %in, align 8
+ %v1 = load float, ptr addrspace(1) %in.1, align 4
+ %v2 = load float, ptr addrspace(1) %in.2, align 8
+ %v3 = load float, ptr addrspace(1) %in.3, align 4
%sum0 = fadd float %v0, %v1
%sum1 = fadd float %v1, %v2
%sum2 = fadd float %v3, %v1
%sum3 = fadd float %v2, %v3
- %out.1 = getelementptr float, float addrspace(1)* %out, i32 1
- %out.2 = getelementptr float, float addrspace(1)* %out, i32 2
- %out.3 = getelementptr float, float addrspace(1)* %out, i32 3
- store float %sum0, float addrspace(1)* %out, align 8
- store float %sum1, float addrspace(1)* %out.1, align 4
- store float %sum2, float addrspace(1)* %out.2, align 8
- store float %sum3, float addrspace(1)* %out.3, align 4
+ %out.1 = getelementptr float, ptr addrspace(1) %out, i32 1
+ %out.2 = getelementptr float, ptr addrspace(1) %out, i32 2
+ %out.3 = getelementptr float, ptr addrspace(1) %out, i32 3
+ store float %sum0, ptr addrspace(1) %out, align 8
+ store float %sum1, ptr addrspace(1) %out.1, align 4
+ store float %sum2, ptr addrspace(1) %out.2, align 8
+ store float %sum3, ptr addrspace(1) %out.3, align 4
ret void
}
@g = common addrspace(1) global i32 zeroinitializer
define i32 @func0() {
- %val = load i32, i32 addrspace(1)* @g
+ %val = load i32, ptr addrspace(1) @g
ret i32 %val
}