target triple = "aarch64-unknown-linux-gnu"
-define void @st1d_fixed(<8 x double>* %ptr) #0 {
+define void @st1d_fixed(ptr %ptr) #0 {
; CHECK-LABEL: st1d_fixed:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%alloc = alloca [16 x double], i32 0
- %bc = bitcast [16 x double]* %alloc to <8 x double>*
- %load = load <8 x double>, <8 x double>* %bc
+ %load = load <8 x double>, ptr %alloc
%strided.vec = shufflevector <8 x double> %load, <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
- store <8 x double> zeroinitializer, <8 x double>* %ptr
+ store <8 x double> zeroinitializer, ptr %ptr
ret void
}
ret <16 x i8> %res
}
-define void @ctlz_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define void @ctlz_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: ctlz_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: clz z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i8>, <32 x i8>* %a
+ %op = load <32 x i8>, ptr %a
%res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %op)
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @ctlz_v64i8(<64 x i8>* %a) #0 {
+define void @ctlz_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: ctlz_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: clz z0.b, p0/m, z0.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <64 x i8>, <64 x i8>* %a
+ %op = load <64 x i8>, ptr %a
%res = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %op)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @ctlz_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define void @ctlz_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: ctlz_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: clz z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x i8>, <128 x i8>* %a
+ %op = load <128 x i8>, ptr %a
%res = call <128 x i8> @llvm.ctlz.v128i8(<128 x i8> %op)
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @ctlz_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define void @ctlz_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: ctlz_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: clz z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <256 x i8>, <256 x i8>* %a
+ %op = load <256 x i8>, ptr %a
%res = call <256 x i8> @llvm.ctlz.v256i8(<256 x i8> %op)
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @ctlz_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define void @ctlz_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: ctlz_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: clz z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %op)
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @ctlz_v32i16(<32 x i16>* %a) #0 {
+define void @ctlz_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: ctlz_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: clz z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%res = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %op)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @ctlz_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define void @ctlz_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: ctlz_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: clz z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%res = call <64 x i16> @llvm.ctlz.v64i16(<64 x i16> %op)
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @ctlz_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define void @ctlz_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: ctlz_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: clz z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%res = call <128 x i16> @llvm.ctlz.v128i16(<128 x i16> %op)
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @ctlz_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define void @ctlz_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: ctlz_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: clz z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %op)
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @ctlz_v16i32(<16 x i32>* %a) #0 {
+define void @ctlz_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: ctlz_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: clz z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %op)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @ctlz_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define void @ctlz_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: ctlz_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: clz z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%res = call <32 x i32> @llvm.ctlz.v32i32(<32 x i32> %op)
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @ctlz_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define void @ctlz_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: ctlz_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: clz z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%res = call <64 x i32> @llvm.ctlz.v64i32(<64 x i32> %op)
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @ctlz_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define void @ctlz_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: ctlz_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: clz z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %op)
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @ctlz_v8i64(<8 x i64>* %a) #0 {
+define void @ctlz_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: ctlz_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: clz z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %op)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @ctlz_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define void @ctlz_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: ctlz_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: clz z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%res = call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> %op)
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @ctlz_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define void @ctlz_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: ctlz_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: clz z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%res = call <32 x i64> @llvm.ctlz.v32i64(<32 x i64> %op)
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @ctpop_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define void @ctpop_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: ctpop_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: cnt z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i8>, <32 x i8>* %a
+ %op = load <32 x i8>, ptr %a
%res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %op)
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @ctpop_v64i8(<64 x i8>* %a) #0 {
+define void @ctpop_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: ctpop_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: cnt z0.b, p0/m, z0.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <64 x i8>, <64 x i8>* %a
+ %op = load <64 x i8>, ptr %a
%res = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %op)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @ctpop_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define void @ctpop_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: ctpop_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: cnt z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x i8>, <128 x i8>* %a
+ %op = load <128 x i8>, ptr %a
%res = call <128 x i8> @llvm.ctpop.v128i8(<128 x i8> %op)
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @ctpop_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define void @ctpop_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: ctpop_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: cnt z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <256 x i8>, <256 x i8>* %a
+ %op = load <256 x i8>, ptr %a
%res = call <256 x i8> @llvm.ctpop.v256i8(<256 x i8> %op)
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @ctpop_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define void @ctpop_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: ctpop_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: cnt z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %op)
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @ctpop_v32i16(<32 x i16>* %a) #0 {
+define void @ctpop_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: ctpop_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: cnt z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%res = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %op)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @ctpop_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define void @ctpop_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: ctpop_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: cnt z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%res = call <64 x i16> @llvm.ctpop.v64i16(<64 x i16> %op)
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @ctpop_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define void @ctpop_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: ctpop_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: cnt z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%res = call <128 x i16> @llvm.ctpop.v128i16(<128 x i16> %op)
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @ctpop_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define void @ctpop_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: ctpop_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: cnt z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %op)
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @ctpop_v16i32(<16 x i32>* %a) #0 {
+define void @ctpop_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: ctpop_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: cnt z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%res = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %op)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @ctpop_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define void @ctpop_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: ctpop_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: cnt z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%res = call <32 x i32> @llvm.ctpop.v32i32(<32 x i32> %op)
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @ctpop_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define void @ctpop_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: ctpop_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: cnt z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%res = call <64 x i32> @llvm.ctpop.v64i32(<64 x i32> %op)
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @ctpop_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define void @ctpop_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: ctpop_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: cnt z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %op)
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @ctpop_v8i64(<8 x i64>* %a) #0 {
+define void @ctpop_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: ctpop_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: cnt z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%res = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %op)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @ctpop_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define void @ctpop_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: ctpop_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: cnt z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%res = call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> %op)
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @ctpop_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define void @ctpop_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: ctpop_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: cnt z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%res = call <32 x i64> @llvm.ctpop.v32i64(<32 x i64> %op)
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @cttz_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define void @cttz_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: cttz_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: clz z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i8>, <32 x i8>* %a
+ %op = load <32 x i8>, ptr %a
%res = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %op)
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @cttz_v64i8(<64 x i8>* %a) #0 {
+define void @cttz_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: cttz_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: clz z0.b, p0/m, z0.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <64 x i8>, <64 x i8>* %a
+ %op = load <64 x i8>, ptr %a
%res = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %op)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @cttz_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define void @cttz_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: cttz_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: clz z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x i8>, <128 x i8>* %a
+ %op = load <128 x i8>, ptr %a
%res = call <128 x i8> @llvm.cttz.v128i8(<128 x i8> %op)
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @cttz_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define void @cttz_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: cttz_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: clz z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <256 x i8>, <256 x i8>* %a
+ %op = load <256 x i8>, ptr %a
%res = call <256 x i8> @llvm.cttz.v256i8(<256 x i8> %op)
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @cttz_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define void @cttz_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: cttz_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: clz z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%res = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %op)
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @cttz_v32i16(<32 x i16>* %a) #0 {
+define void @cttz_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: cttz_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: clz z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%res = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %op)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @cttz_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define void @cttz_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: cttz_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: clz z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%res = call <64 x i16> @llvm.cttz.v64i16(<64 x i16> %op)
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @cttz_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define void @cttz_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: cttz_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: clz z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%res = call <128 x i16> @llvm.cttz.v128i16(<128 x i16> %op)
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @cttz_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define void @cttz_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: cttz_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: clz z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%res = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %op)
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @cttz_v16i32(<16 x i32>* %a) #0 {
+define void @cttz_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: cttz_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: clz z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%res = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %op)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @cttz_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define void @cttz_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: cttz_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: clz z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%res = call <32 x i32> @llvm.cttz.v32i32(<32 x i32> %op)
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @cttz_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define void @cttz_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: cttz_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: clz z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%res = call <64 x i32> @llvm.cttz.v64i32(<64 x i32> %op)
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @cttz_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define void @cttz_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: cttz_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: clz z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%res = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %op)
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @cttz_v8i64(<8 x i64>* %a) #0 {
+define void @cttz_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: cttz_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: clz z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%res = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %op)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @cttz_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define void @cttz_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: cttz_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: clz z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%res = call <16 x i64> @llvm.cttz.v16i64(<16 x i64> %op)
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @cttz_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define void @cttz_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: cttz_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: clz z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%res = call <32 x i64> @llvm.cttz.v32i64(<32 x i64> %op)
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
target triple = "aarch64-unknown-linux-gnu"
; Don't use SVE for 64-bit vectors.
-define void @bitcast_v4i16(<4 x i16> *%a, <4 x half>* %b) vscale_range(2,0) #0 {
+define void @bitcast_v4i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: bitcast_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <4 x i16>, <4 x i16>* %a
+ %load = load volatile <4 x i16>, ptr %a
%cast = bitcast <4 x i16> %load to <4 x half>
- store volatile <4 x half> %cast, <4 x half>* %b
+ store volatile <4 x half> %cast, ptr %b
ret void
}
; Don't use SVE for 128-bit vectors.
-define void @bitcast_v8i16(<8 x i16> *%a, <8 x half>* %b) vscale_range(2,0) #0 {
+define void @bitcast_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: bitcast_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <8 x i16>, <8 x i16>* %a
+ %load = load volatile <8 x i16>, ptr %a
%cast = bitcast <8 x i16> %load to <8 x half>
- store volatile <8 x half> %cast, <8 x half>* %b
+ store volatile <8 x half> %cast, ptr %b
ret void
}
-define void @bitcast_v16i16(<16 x i16> *%a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @bitcast_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: bitcast_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <16 x i16>, <16 x i16>* %a
+ %load = load volatile <16 x i16>, ptr %a
%cast = bitcast <16 x i16> %load to <16 x half>
- store volatile <16 x half> %cast, <16 x half>* %b
+ store volatile <16 x half> %cast, ptr %b
ret void
}
-define void @bitcast_v32i16(<32 x i16> *%a, <32 x half>* %b) #0 {
+define void @bitcast_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: bitcast_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %load = load volatile <32 x i16>, <32 x i16>* %a
+ %load = load volatile <32 x i16>, ptr %a
%cast = bitcast <32 x i16> %load to <32 x half>
- store volatile <32 x half> %cast, <32 x half>* %b
+ store volatile <32 x half> %cast, ptr %b
ret void
}
-define void @bitcast_v64i16(<64 x i16> *%a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @bitcast_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: bitcast_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <64 x i16>, <64 x i16>* %a
+ %load = load volatile <64 x i16>, ptr %a
%cast = bitcast <64 x i16> %load to <64 x half>
- store volatile <64 x half> %cast, <64 x half>* %b
+ store volatile <64 x half> %cast, ptr %b
ret void
}
-define void @bitcast_v128i16(<128 x i16> *%a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @bitcast_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: bitcast_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <128 x i16>, <128 x i16>* %a
+ %load = load volatile <128 x i16>, ptr %a
%cast = bitcast <128 x i16> %load to <128 x half>
- store volatile <128 x half> %cast, <128 x half>* %b
+ store volatile <128 x half> %cast, ptr %b
ret void
}
; Don't use SVE for 64-bit vectors.
-define void @bitcast_v2i32(<2 x i32> *%a, <2 x float>* %b) vscale_range(2,0) #0 {
+define void @bitcast_v2i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: bitcast_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <2 x i32>, <2 x i32>* %a
+ %load = load volatile <2 x i32>, ptr %a
%cast = bitcast <2 x i32> %load to <2 x float>
- store volatile <2 x float> %cast, <2 x float>* %b
+ store volatile <2 x float> %cast, ptr %b
ret void
}
; Don't use SVE for 128-bit vectors.
-define void @bitcast_v4i32(<4 x i32> *%a, <4 x float>* %b) vscale_range(2,0) #0 {
+define void @bitcast_v4i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: bitcast_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <4 x i32>, <4 x i32>* %a
+ %load = load volatile <4 x i32>, ptr %a
%cast = bitcast <4 x i32> %load to <4 x float>
- store volatile <4 x float> %cast, <4 x float>* %b
+ store volatile <4 x float> %cast, ptr %b
ret void
}
-define void @bitcast_v8i32(<8 x i32> *%a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @bitcast_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: bitcast_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <8 x i32>, <8 x i32>* %a
+ %load = load volatile <8 x i32>, ptr %a
%cast = bitcast <8 x i32> %load to <8 x float>
- store volatile <8 x float> %cast, <8 x float>* %b
+ store volatile <8 x float> %cast, ptr %b
ret void
}
-define void @bitcast_v16i32(<16 x i32> *%a, <16 x float>* %b) #0 {
+define void @bitcast_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: bitcast_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %load = load volatile <16 x i32>, <16 x i32>* %a
+ %load = load volatile <16 x i32>, ptr %a
%cast = bitcast <16 x i32> %load to <16 x float>
- store volatile <16 x float> %cast, <16 x float>* %b
+ store volatile <16 x float> %cast, ptr %b
ret void
}
-define void @bitcast_v32i32(<32 x i32> *%a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @bitcast_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: bitcast_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <32 x i32>, <32 x i32>* %a
+ %load = load volatile <32 x i32>, ptr %a
%cast = bitcast <32 x i32> %load to <32 x float>
- store volatile <32 x float> %cast, <32 x float>* %b
+ store volatile <32 x float> %cast, ptr %b
ret void
}
-define void @bitcast_v64i32(<64 x i32> *%a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @bitcast_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: bitcast_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <64 x i32>, <64 x i32>* %a
+ %load = load volatile <64 x i32>, ptr %a
%cast = bitcast <64 x i32> %load to <64 x float>
- store volatile <64 x float> %cast, <64 x float>* %b
+ store volatile <64 x float> %cast, ptr %b
ret void
}
; Don't use SVE for 64-bit vectors.
-define void @bitcast_v1i64(<1 x i64> *%a, <1 x double>* %b) vscale_range(2,0) #0 {
+define void @bitcast_v1i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: bitcast_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <1 x i64>, <1 x i64>* %a
+ %load = load volatile <1 x i64>, ptr %a
%cast = bitcast <1 x i64> %load to <1 x double>
- store volatile <1 x double> %cast, <1 x double>* %b
+ store volatile <1 x double> %cast, ptr %b
ret void
}
; Don't use SVE for 128-bit vectors.
-define void @bitcast_v2i64(<2 x i64> *%a, <2 x double>* %b) vscale_range(2,0) #0 {
+define void @bitcast_v2i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: bitcast_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <2 x i64>, <2 x i64>* %a
+ %load = load volatile <2 x i64>, ptr %a
%cast = bitcast <2 x i64> %load to <2 x double>
- store volatile <2 x double> %cast, <2 x double>* %b
+ store volatile <2 x double> %cast, ptr %b
ret void
}
-define void @bitcast_v4i64(<4 x i64> *%a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @bitcast_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: bitcast_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <4 x i64>, <4 x i64>* %a
+ %load = load volatile <4 x i64>, ptr %a
%cast = bitcast <4 x i64> %load to <4 x double>
- store volatile <4 x double> %cast, <4 x double>* %b
+ store volatile <4 x double> %cast, ptr %b
ret void
}
-define void @bitcast_v8i64(<8 x i64> *%a, <8 x double>* %b) #0 {
+define void @bitcast_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: bitcast_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %load = load volatile <8 x i64>, <8 x i64>* %a
+ %load = load volatile <8 x i64>, ptr %a
%cast = bitcast <8 x i64> %load to <8 x double>
- store volatile <8 x double> %cast, <8 x double>* %b
+ store volatile <8 x double> %cast, ptr %b
ret void
}
-define void @bitcast_v16i64(<16 x i64> *%a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @bitcast_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: bitcast_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <16 x i64>, <16 x i64>* %a
+ %load = load volatile <16 x i64>, ptr %a
%cast = bitcast <16 x i64> %load to <16 x double>
- store volatile <16 x double> %cast, <16 x double>* %b
+ store volatile <16 x double> %cast, ptr %b
ret void
}
-define void @bitcast_v32i64(<32 x i64> *%a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @bitcast_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: bitcast_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %load = load volatile <32 x i64>, <32 x i64>* %a
+ %load = load volatile <32 x i64>, ptr %a
%cast = bitcast <32 x i64> %load to <32 x double>
- store volatile <32 x double> %cast, <32 x double>* %b
+ store volatile <32 x double> %cast, ptr %b
ret void
}
; this is implemented, this test will be fleshed out.
;
-define <8 x i32> @fixed_bitselect_v8i32(<8 x i32>* %pre_cond_ptr, <8 x i32>* %left_ptr, <8 x i32>* %right_ptr) #0 {
+define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %right_ptr) #0 {
; CHECK-LABEL: fixed_bitselect_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: orr z0.d, z1.d, z0.d
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %pre_cond = load <8 x i32>, <8 x i32>* %pre_cond_ptr
- %left = load <8 x i32>, <8 x i32>* %left_ptr
- %right = load <8 x i32>, <8 x i32>* %right_ptr
+ %pre_cond = load <8 x i32>, ptr %pre_cond_ptr
+ %left = load <8 x i32>, ptr %left_ptr
+ %right = load <8 x i32>, ptr %right_ptr
%neg_cond = sub <8 x i32> zeroinitializer, %pre_cond
%min_cond = add <8 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
ret <16 x i8> %res
}
-define void @concat_v32i8(<16 x i8>* %a, <16 x i8>* %b, <32 x i8>* %c) vscale_range(2,0) #0 {
+define void @concat_v32i8(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: st1b { z1.b }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x i8>, <16 x i8>* %a
- %op2 = load <16 x i8>, <16 x i8>* %b
+ %op1 = load <16 x i8>, ptr %a
+ %op2 = load <16 x i8>, ptr %b
%res = shufflevector <16 x i8> %op1, <16 x i8> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- store <32 x i8> %res, <32 x i8>* %c
+ store <32 x i8> %res, ptr %c
ret void
}
-define void @concat_v64i8(<32 x i8>* %a, <32 x i8>* %b, <64 x i8>* %c) #0 {
+define void @concat_v64i8(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: concat_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_512-NEXT: ptrue p0.b, vl64
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = shufflevector <32 x i8> %op1, <32 x i8> %op2, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47,
i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55,
i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
- store <64 x i8> %res, <64 x i8>* %c
+ store <64 x i8> %res, ptr %c
ret void
}
-define void @concat_v128i8(<64 x i8>* %a, <64 x i8>* %b, <128 x i8>* %c) vscale_range(8,0) #0 {
+define void @concat_v128i8(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: concat_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: st1b { z0.b }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = shufflevector <64 x i8> %op1, <64 x i8> %op2, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111,
i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119,
i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127>
- store <128 x i8> %res, <128 x i8>* %c
+ store <128 x i8> %res, ptr %c
ret void
}
-define void @concat_v256i8(<128 x i8>* %a, <128 x i8>* %b, <256 x i8>* %c) vscale_range(16,0) #0 {
+define void @concat_v256i8(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: concat_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: st1b { z0.b }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = shufflevector <128 x i8> %op1, <128 x i8> %op2, <256 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 232, i32 233, i32 234, i32 235, i32 236, i32 237, i32 238, i32 239,
i32 240, i32 241, i32 242, i32 243, i32 244, i32 245, i32 246, i32 247,
i32 248, i32 249, i32 250, i32 251, i32 252, i32 253, i32 254, i32 255>
- store <256 x i8> %res, <256 x i8>* %c
+ store <256 x i8> %res, ptr %c
ret void
}
ret <8 x i16> %res
}
-define void @concat_v16i16(<8 x i16>* %a, <8 x i16>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @concat_v16i16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: st1h { z1.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <8 x i16>, <8 x i16>* %a
- %op2 = load <8 x i16>, <8 x i16>* %b
+ %op1 = load <8 x i16>, ptr %a
+ %op2 = load <8 x i16>, ptr %b
%res = shufflevector <8 x i16> %op1, <8 x i16> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- store <16 x i16> %res, <16 x i16>* %c
+ store <16 x i16> %res, ptr %c
ret void
}
-define void @concat_v32i16(<16 x i16>* %a, <16 x i16>* %b, <32 x i16>* %c) #0 {
+define void @concat_v32i16(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: concat_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = shufflevector <16 x i16> %op1, <16 x i16> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- store <32 x i16> %res, <32 x i16>* %c
+ store <32 x i16> %res, ptr %c
ret void
}
-define void @concat_v64i16(<32 x i16>* %a, <32 x i16>* %b, <64 x i16>* %c) vscale_range(8,0) #0 {
+define void @concat_v64i16(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: concat_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = shufflevector <32 x i16> %op1, <32 x i16> %op2, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47,
i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55,
i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
- store <64 x i16> %res, <64 x i16>* %c
+ store <64 x i16> %res, ptr %c
ret void
}
-define void @concat_v128i16(<64 x i16>* %a, <64 x i16>* %b, <128 x i16>* %c) vscale_range(16,0) #0 {
+define void @concat_v128i16(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: concat_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = shufflevector <64 x i16> %op1, <64 x i16> %op2, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111,
i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119,
i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127>
- store <128 x i16> %res, <128 x i16>* %c
+ store <128 x i16> %res, ptr %c
ret void
}
ret <4 x i32> %res
}
-define void @concat_v8i32(<4 x i32>* %a, <4 x i32>* %b, <8 x i32>* %c) vscale_range(2,0) #0 {
+define void @concat_v8i32(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: st1w { z1.s }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <4 x i32>, <4 x i32>* %a
- %op2 = load <4 x i32>, <4 x i32>* %b
+ %op1 = load <4 x i32>, ptr %a
+ %op2 = load <4 x i32>, ptr %b
%res = shufflevector <4 x i32> %op1, <4 x i32> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- store <8 x i32> %res, <8 x i32>* %c
+ store <8 x i32> %res, ptr %c
ret void
}
-define void @concat_v16i32(<8 x i32>* %a, <8 x i32>* %b, <16 x i32>* %c) #0 {
+define void @concat_v16i32(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: concat_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = shufflevector <8 x i32> %op1, <8 x i32> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- store <16 x i32> %res, <16 x i32>* %c
+ store <16 x i32> %res, ptr %c
ret void
}
-define void @concat_v32i32(<16 x i32>* %a, <16 x i32>* %b, <32 x i32>* %c) vscale_range(8,0) #0 {
+define void @concat_v32i32(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: concat_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = shufflevector <16 x i32> %op1, <16 x i32> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- store <32 x i32> %res, <32 x i32>* %c
+ store <32 x i32> %res, ptr %c
ret void
}
-define void @concat_v64i32(<32 x i32>* %a, <32 x i32>* %b, <64 x i32>* %c) vscale_range(16,0) #0 {
+define void @concat_v64i32(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: concat_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = shufflevector <32 x i32> %op1, <32 x i32> %op2, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47,
i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55,
i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
- store <64 x i32> %res, <64 x i32>* %c
+ store <64 x i32> %res, ptr %c
ret void
}
ret <2 x i64> %res
}
-define void @concat_v4i64(<2 x i64>* %a, <2 x i64>* %b, <4 x i64>* %c) vscale_range(2,0) #0 {
+define void @concat_v4i64(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: st1d { z1.d }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <2 x i64>, <2 x i64>* %a
- %op2 = load <2 x i64>, <2 x i64>* %b
+ %op1 = load <2 x i64>, ptr %a
+ %op2 = load <2 x i64>, ptr %b
%res = shufflevector <2 x i64> %op1, <2 x i64> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- store <4 x i64> %res, <4 x i64>* %c
+ store <4 x i64> %res, ptr %c
ret void
}
-define void @concat_v8i64(<4 x i64>* %a, <4 x i64>* %b, <8 x i64>* %c) #0 {
+define void @concat_v8i64(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: concat_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = shufflevector <4 x i64> %op1, <4 x i64> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- store <8 x i64> %res, <8 x i64>* %c
+ store <8 x i64> %res, ptr %c
ret void
}
-define void @concat_v16i64(<8 x i64>* %a, <8 x i64>* %b, <16 x i64>* %c) vscale_range(8,0) #0 {
+define void @concat_v16i64(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: concat_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = shufflevector <8 x i64> %op1, <8 x i64> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- store <16 x i64> %res, <16 x i64>* %c
+ store <16 x i64> %res, ptr %c
ret void
}
-define void @concat_v32i64(<16 x i64>* %a, <16 x i64>* %b, <32 x i64>* %c) vscale_range(16,0) #0 {
+define void @concat_v32i64(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: concat_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = shufflevector <16 x i64> %op1, <16 x i64> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- store <32 x i64> %res, <32 x i64>* %c
+ store <32 x i64> %res, ptr %c
ret void
}
ret <8 x half> %res
}
-define void @concat_v16f16(<8 x half>* %a, <8 x half>* %b, <16 x half>* %c) vscale_range(2,0) #0 {
+define void @concat_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: st1h { z1.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <8 x half>, <8 x half>* %a
- %op2 = load <8 x half>, <8 x half>* %b
+ %op1 = load <8 x half>, ptr %a
+ %op2 = load <8 x half>, ptr %b
%res = shufflevector <8 x half> %op1, <8 x half> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- store <16 x half> %res, <16 x half>* %c
+ store <16 x half> %res, ptr %c
ret void
}
-define void @concat_v32f16(<16 x half>* %a, <16 x half>* %b, <32 x half>* %c) #0 {
+define void @concat_v32f16(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: concat_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%res = shufflevector <16 x half> %op1, <16 x half> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- store <32 x half> %res, <32 x half>* %c
+ store <32 x half> %res, ptr %c
ret void
}
-define void @concat_v64f16(<32 x half>* %a, <32 x half>* %b, <64 x half>* %c) vscale_range(8,0) #0 {
+define void @concat_v64f16(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: concat_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
%res = shufflevector <32 x half> %op1, <32 x half> %op2, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47,
i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55,
i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
- store <64 x half> %res, <64 x half>* %c
+ store <64 x half> %res, ptr %c
ret void
}
-define void @concat_v128f16(<64 x half>* %a, <64 x half>* %b, <128 x half>* %c) vscale_range(16,0) #0 {
+define void @concat_v128f16(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: concat_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
%res = shufflevector <64 x half> %op1, <64 x half> %op2, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111,
i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119,
i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127>
- store <128 x half> %res, <128 x half>* %c
+ store <128 x half> %res, ptr %c
ret void
}
ret <4 x float> %res
}
-define void @concat_v8f32(<4 x float>* %a, <4 x float>* %b, <8 x float>* %c) vscale_range(2,0) #0 {
+define void @concat_v8f32(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: st1w { z1.s }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <4 x float>, <4 x float>* %a
- %op2 = load <4 x float>, <4 x float>* %b
+ %op1 = load <4 x float>, ptr %a
+ %op2 = load <4 x float>, ptr %b
%res = shufflevector <4 x float> %op1, <4 x float> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- store <8 x float> %res, <8 x float>* %c
+ store <8 x float> %res, ptr %c
ret void
}
-define void @concat_v16f32(<8 x float>* %a, <8 x float>* %b, <16 x float>* %c) #0 {
+define void @concat_v16f32(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: concat_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
%res = shufflevector <8 x float> %op1, <8 x float> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- store <16 x float> %res, <16 x float>* %c
+ store <16 x float> %res, ptr %c
ret void
}
-define void @concat_v32f32(<16 x float>* %a, <16 x float>* %b, <32 x float>* %c) vscale_range(8,0) #0 {
+define void @concat_v32f32(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: concat_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
%res = shufflevector <16 x float> %op1, <16 x float> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- store <32 x float> %res, <32 x float>* %c
+ store <32 x float> %res, ptr %c
ret void
}
-define void @concat_v64f32(<32 x float>* %a, <32 x float>* %b, <64 x float>* %c) vscale_range(16,0) #0 {
+define void @concat_v64f32(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: concat_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
%res = shufflevector <32 x float> %op1, <32 x float> %op2, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47,
i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55,
i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
- store <64 x float> %res, <64 x float>* %c
+ store <64 x float> %res, ptr %c
ret void
}
ret <2 x double> %res
}
-define void @concat_v4f64(<2 x double>* %a, <2 x double>* %b, <4 x double>* %c) vscale_range(2,0) #0 {
+define void @concat_v4f64(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: st1d { z1.d }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <2 x double>, <2 x double>* %a
- %op2 = load <2 x double>, <2 x double>* %b
+ %op1 = load <2 x double>, ptr %a
+ %op2 = load <2 x double>, ptr %b
%res = shufflevector <2 x double> %op1, <2 x double> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- store <4 x double> %res, <4 x double>* %c
+ store <4 x double> %res, ptr %c
ret void
}
-define void @concat_v8f64(<4 x double>* %a, <4 x double>* %b, <8 x double>* %c) #0 {
+define void @concat_v8f64(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: concat_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%res = shufflevector <4 x double> %op1, <4 x double> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- store <8 x double> %res, <8 x double>* %c
+ store <8 x double> %res, ptr %c
ret void
}
-define void @concat_v16f64(<8 x double>* %a, <8 x double>* %b, <16 x double>* %c) vscale_range(8,0) #0 {
+define void @concat_v16f64(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: concat_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
%res = shufflevector <8 x double> %op1, <8 x double> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- store <16 x double> %res, <16 x double>* %c
+ store <16 x double> %res, ptr %c
ret void
}
-define void @concat_v32f64(<16 x double>* %a, <16 x double>* %b, <32 x double>* %c) vscale_range(16,0) #0 {
+define void @concat_v32f64(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: concat_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
%res = shufflevector <16 x double> %op1, <16 x double> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- store <32 x double> %res, <32 x double>* %c
+ store <32 x double> %res, ptr %c
ret void
}
; undef
;
-define void @concat_v32i8_undef(<16 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @concat_v32i8_undef(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v32i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x i8>, <16 x i8>* %a
+ %op1 = load <16 x i8>, ptr %a
%res = shufflevector <16 x i8> %op1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- store <32 x i8> %res, <32 x i8>* %b
+ store <32 x i8> %res, ptr %b
ret void
}
-define void @concat_v16i16_undef(<8 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @concat_v16i16_undef(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v16i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x i16>, <8 x i16>* %a
+ %op1 = load <8 x i16>, ptr %a
%res = shufflevector <8 x i16> %op1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- store <16 x i16> %res, <16 x i16>* %b
+ store <16 x i16> %res, ptr %b
ret void
}
-define void @concat_v8i32_undef(<4 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @concat_v8i32_undef(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v8i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x i32>, <4 x i32>* %a
+ %op1 = load <4 x i32>, ptr %a
%res = shufflevector <4 x i32> %op1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- store <8 x i32> %res, <8 x i32>* %b
+ store <8 x i32> %res, ptr %b
ret void
}
-define void @concat_v4i64_undef(<2 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @concat_v4i64_undef(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v4i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <2 x i64>, <2 x i64>* %a
+ %op1 = load <2 x i64>, ptr %a
%res = shufflevector <2 x i64> %op1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- store <4 x i64> %res, <4 x i64>* %b
+ store <4 x i64> %res, ptr %b
ret void
}
; > 2 operands
;
-define void @concat_v32i8_4op(<8 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @concat_v32i8_4op(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v32i8_4op:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x i8>, <8 x i8>* %a
+ %op1 = load <8 x i8>, ptr %a
%shuffle = shufflevector <8 x i8> %op1, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%res = shufflevector <16 x i8> %shuffle, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- store <32 x i8> %res, <32 x i8>* %b
+ store <32 x i8> %res, ptr %b
ret void
}
-define void @concat_v16i16_4op(<4 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @concat_v16i16_4op(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v16i16_4op:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x i16>, <4 x i16>* %a
+ %op1 = load <4 x i16>, ptr %a
%shuffle = shufflevector <4 x i16> %op1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%res = shufflevector <8 x i16> %shuffle, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- store <16 x i16> %res, <16 x i16>* %b
+ store <16 x i16> %res, ptr %b
ret void
}
-define void @concat_v8i32_4op(<2 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @concat_v8i32_4op(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v8i32_4op:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <2 x i32>, <2 x i32>* %a
+ %op1 = load <2 x i32>, ptr %a
%shuffle = shufflevector <2 x i32> %op1, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%res = shufflevector <4 x i32> %shuffle, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- store <8 x i32> %res, <8 x i32>* %b
+ store <8 x i32> %res, ptr %b
ret void
}
-define void @concat_v4i64_4op(<1 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @concat_v4i64_4op(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: concat_v4i64_4op:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <1 x i64>, <1 x i64>* %a
+ %op1 = load <1 x i64>, ptr %a
%shuffle = shufflevector <1 x i64> %op1, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
%res = shufflevector <2 x i64> %shuffle, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- store <4 x i64> %res, <4 x i64>* %b
+ store <4 x i64> %res, ptr %b
ret void
}
target triple = "aarch64-unknown-linux-gnu"
-define <4 x i32> @load_zext_v4i16i32(<4 x i16>* %ap) vscale_range(2,0) #0 {
+define <4 x i32> @load_zext_v4i16i32(ptr %ap) vscale_range(2,0) #0 {
; CHECK-LABEL: load_zext_v4i16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ret
- %a = load <4 x i16>, <4 x i16>* %ap
+ %a = load <4 x i16>, ptr %ap
%val = zext <4 x i16> %a to <4 x i32>
ret <4 x i32> %val
}
; Don't try to use SVE for irregular types.
-define <2 x i256> @load_zext_v2i64i256(<2 x i64>* %ap) #0 {
+define <2 x i256> @load_zext_v2i64i256(ptr %ap) #0 {
; CHECK-LABEL: load_zext_v2i64i256:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: mov x7, xzr
; CHECK-NEXT: ret
- %a = load <2 x i64>, <2 x i64>* %ap
+ %a = load <2 x i64>, ptr %ap
%val = zext <2 x i64> %a to <2 x i256>
ret <2 x i256> %val
}
-define <8 x i32> @load_zext_v8i16i32(<8 x i16>* %ap) vscale_range(2,0) #0 {
+define <8 x i32> @load_zext_v8i16i32(ptr %ap) vscale_range(2,0) #0 {
; CHECK-LABEL: load_zext_v8i16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %a = load <8 x i16>, <8 x i16>* %ap
+ %a = load <8 x i16>, ptr %ap
%val = zext <8 x i16> %a to <8 x i32>
ret <8 x i32> %val
}
-define <16 x i32> @load_zext_v16i16i32(<16 x i16>* %ap) vscale_range(4,0) #0 {
+define <16 x i32> @load_zext_v16i16i32(ptr %ap) vscale_range(4,0) #0 {
; CHECK-LABEL: load_zext_v16i16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %a = load <16 x i16>, <16 x i16>* %ap
+ %a = load <16 x i16>, ptr %ap
%val = zext <16 x i16> %a to <16 x i32>
ret <16 x i32> %val
}
-define <32 x i32> @load_zext_v32i16i32(<32 x i16>* %ap) vscale_range(8,0) #0 {
+define <32 x i32> @load_zext_v32i16i32(ptr %ap) vscale_range(8,0) #0 {
; CHECK-LABEL: load_zext_v32i16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %a = load <32 x i16>, <32 x i16>* %ap
+ %a = load <32 x i16>, ptr %ap
%val = zext <32 x i16> %a to <32 x i32>
ret <32 x i32> %val
}
-define <64 x i32> @load_zext_v64i16i32(<64 x i16>* %ap) #0 {
+define <64 x i32> @load_zext_v64i16i32(ptr %ap) #0 {
; VBITS_GE_1024-LABEL: load_zext_v64i16i32:
; VBITS_GE_1024: // %bb.0:
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
; VBITS_GE_2048-NEXT: ld1h { z0.s }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
- %a = load <64 x i16>, <64 x i16>* %ap
+ %a = load <64 x i16>, ptr %ap
%val = zext <64 x i16> %a to <64 x i32>
ret <64 x i32> %val
}
-define <4 x i32> @load_sext_v4i16i32(<4 x i16>* %ap) vscale_range(2,0) #0 {
+define <4 x i32> @load_sext_v4i16i32(ptr %ap) vscale_range(2,0) #0 {
; CHECK-LABEL: load_sext_v4i16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: ret
- %a = load <4 x i16>, <4 x i16>* %ap
+ %a = load <4 x i16>, ptr %ap
%val = sext <4 x i16> %a to <4 x i32>
ret <4 x i32> %val
}
-define <8 x i32> @load_sext_v8i16i32(<8 x i16>* %ap) vscale_range(2,0) #0 {
+define <8 x i32> @load_sext_v8i16i32(ptr %ap) vscale_range(2,0) #0 {
; CHECK-LABEL: load_sext_v8i16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %a = load <8 x i16>, <8 x i16>* %ap
+ %a = load <8 x i16>, ptr %ap
%val = sext <8 x i16> %a to <8 x i32>
ret <8 x i32> %val
}
-define <16 x i32> @load_sext_v16i16i32(<16 x i16>* %ap) vscale_range(4,0) #0 {
+define <16 x i32> @load_sext_v16i16i32(ptr %ap) vscale_range(4,0) #0 {
; CHECK-LABEL: load_sext_v16i16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %a = load <16 x i16>, <16 x i16>* %ap
+ %a = load <16 x i16>, ptr %ap
%val = sext <16 x i16> %a to <16 x i32>
ret <16 x i32> %val
}
-define <32 x i32> @load_sext_v32i16i32(<32 x i16>* %ap) vscale_range(8,0) #0 {
+define <32 x i32> @load_sext_v32i16i32(ptr %ap) vscale_range(8,0) #0 {
; CHECK-LABEL: load_sext_v32i16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %a = load <32 x i16>, <32 x i16>* %ap
+ %a = load <32 x i16>, ptr %ap
%val = sext <32 x i16> %a to <32 x i32>
ret <32 x i32> %val
}
-define <64 x i32> @load_sext_v64i16i32(<64 x i16>* %ap) #0 {
+define <64 x i32> @load_sext_v64i16i32(ptr %ap) #0 {
; VBITS_GE_1024-LABEL: load_sext_v64i16i32:
; VBITS_GE_1024: // %bb.0:
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
; VBITS_GE_2048-NEXT: ld1sh { z0.s }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
- %a = load <64 x i16>, <64 x i16>* %ap
+ %a = load <64 x i16>, ptr %ap
%val = sext <64 x i16> %a to <64 x i32>
ret <64 x i32> %val
}
-define <32 x i64> @load_zext_v32i8i64(<32 x i8>* %ap) #0 {
+define <32 x i64> @load_zext_v32i8i64(ptr %ap) #0 {
; VBITS_GE_1024-LABEL: load_zext_v32i8i64:
; VBITS_GE_1024: // %bb.0:
; VBITS_GE_1024-NEXT: ptrue p0.b, vl32
; VBITS_GE_2048-NEXT: ld1b { z0.d }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
- %a = load <32 x i8>, <32 x i8>* %ap
+ %a = load <32 x i8>, ptr %ap
%val = zext <32 x i8> %a to <32 x i64>
ret <32 x i64> %val
}
-define <32 x i64> @load_sext_v32i8i64(<32 x i8>* %ap) #0 {
+define <32 x i64> @load_sext_v32i8i64(ptr %ap) #0 {
; VBITS_GE_1024-LABEL: load_sext_v32i8i64:
; VBITS_GE_1024: // %bb.0:
; VBITS_GE_1024-NEXT: ptrue p0.b, vl32
; VBITS_GE_2048-NEXT: ld1sb { z0.d }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
- %a = load <32 x i8>, <32 x i8>* %ap
+ %a = load <32 x i8>, ptr %ap
%val = sext <32 x i8> %a to <32 x i64>
ret <32 x i64> %val
}
-define <32 x i64> @load_zext_v32i16i64(<32 x i16>* %ap) #0 {
+define <32 x i64> @load_zext_v32i16i64(ptr %ap) #0 {
; VBITS_GE_1024-LABEL: load_zext_v32i16i64:
; VBITS_GE_1024: // %bb.0:
; VBITS_GE_1024-NEXT: ptrue p0.h, vl32
; VBITS_GE_2048-NEXT: ld1h { z0.d }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
- %a = load <32 x i16>, <32 x i16>* %ap
+ %a = load <32 x i16>, ptr %ap
%val = zext <32 x i16> %a to <32 x i64>
ret <32 x i64> %val
}
-define <32 x i64> @load_sext_v32i16i64(<32 x i16>* %ap) #0 {
+define <32 x i64> @load_sext_v32i16i64(ptr %ap) #0 {
; VBITS_GE_1024-LABEL: load_sext_v32i16i64:
; VBITS_GE_1024: // %bb.0:
; VBITS_GE_1024-NEXT: ptrue p0.h, vl32
; VBITS_GE_2048-NEXT: ld1sh { z0.d }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
- %a = load <32 x i16>, <32 x i16>* %ap
+ %a = load <32 x i16>, ptr %ap
%val = sext <32 x i16> %a to <32 x i64>
ret <32 x i64> %val
}
-define <32 x i64> @load_zext_v32i32i64(<32 x i32>* %ap) #0 {
+define <32 x i64> @load_zext_v32i32i64(ptr %ap) #0 {
; VBITS_GE_1024-LABEL: load_zext_v32i32i64:
; VBITS_GE_1024: // %bb.0:
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
; VBITS_GE_2048-NEXT: ld1w { z0.d }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
- %a = load <32 x i32>, <32 x i32>* %ap
+ %a = load <32 x i32>, ptr %ap
%val = zext <32 x i32> %a to <32 x i64>
ret <32 x i64> %val
}
-define <32 x i64> @load_sext_v32i32i64(<32 x i32>* %ap) #0 {
+define <32 x i64> @load_sext_v32i32i64(ptr %ap) #0 {
; VBITS_GE_1024-LABEL: load_sext_v32i32i64:
; VBITS_GE_1024: // %bb.0:
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
; VBITS_GE_2048-NEXT: ld1sw { z0.d }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
- %a = load <32 x i32>, <32 x i32>* %ap
+ %a = load <32 x i32>, ptr %ap
%val = sext <32 x i32> %a to <32 x i64>
ret <32 x i64> %val
}
ret <8 x i8> %ret
}
-define void @extract_subvector_v32i8(<32 x i8>* %a, <16 x i8>* %b) vscale_range(2,0) #0 {
+define void @extract_subvector_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: extract_subvector_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op = load <32 x i8>, <32 x i8>* %a
+ %op = load <32 x i8>, ptr %a
%ret = call <16 x i8> @llvm.vector.extract.v16i8.v32i8(<32 x i8> %op, i64 16)
- store <16 x i8> %ret, <16 x i8>* %b
+ store <16 x i8> %ret, ptr %b
ret void
}
-define void @extract_subvector_v64i8(<64 x i8>* %a, <32 x i8>* %b) #0 {
+define void @extract_subvector_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: extract_subvector_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: ext z0.b, z0.b, z0.b, #32
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op = load <64 x i8>, <64 x i8>* %a
+ %op = load <64 x i8>, ptr %a
%ret = call <32 x i8> @llvm.vector.extract.v32i8.v64i8(<64 x i8> %op, i64 32)
- store <32 x i8> %ret, <32 x i8>* %b
+ store <32 x i8> %ret, ptr %b
ret void
}
-define void @extract_subvector_v128i8(<128 x i8>* %a, <64 x i8>* %b) vscale_range(8,0) #0 {
+define void @extract_subvector_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: extract_subvector_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <128 x i8>, <128 x i8>* %a
+ %op = load <128 x i8>, ptr %a
%ret = call <64 x i8> @llvm.vector.extract.v64i8.v128i8(<128 x i8> %op, i64 64)
- store <64 x i8> %ret, <64 x i8>* %b
+ store <64 x i8> %ret, ptr %b
ret void
}
-define void @extract_subvector_v256i8(<256 x i8>* %a, <128 x i8>* %b) vscale_range(16,0) #0 {
+define void @extract_subvector_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: extract_subvector_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <256 x i8>, <256 x i8>* %a
+ %op = load <256 x i8>, ptr %a
%ret = call <128 x i8> @llvm.vector.extract.v128i8.v256i8(<256 x i8> %op, i64 128)
- store <128 x i8> %ret, <128 x i8>* %b
+ store <128 x i8> %ret, ptr %b
ret void
}
ret <4 x i16> %ret
}
-define void @extract_subvector_v16i16(<16 x i16>* %a, <8 x i16>* %b) vscale_range(2,0) #0 {
+define void @extract_subvector_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: extract_subvector_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%ret = call <8 x i16> @llvm.vector.extract.v8i16.v16i16(<16 x i16> %op, i64 8)
- store <8 x i16> %ret, <8 x i16>* %b
+ store <8 x i16> %ret, ptr %b
ret void
}
-define void @extract_subvector_v32i16(<32 x i16>* %a, <16 x i16>* %b) #0 {
+define void @extract_subvector_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: extract_subvector_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: ext z0.b, z0.b, z0.b, #32
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%ret = call <16 x i16> @llvm.vector.extract.v16i16.v32i16(<32 x i16> %op, i64 16)
- store <16 x i16> %ret, <16 x i16>* %b
+ store <16 x i16> %ret, ptr %b
ret void
}
-define void @extract_subvector_v64i16(<64 x i16>* %a, <32 x i16>* %b) vscale_range(8,0) #0 {
+define void @extract_subvector_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: extract_subvector_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%ret = call <32 x i16> @llvm.vector.extract.v32i16.v64i16(<64 x i16> %op, i64 32)
- store <32 x i16> %ret, <32 x i16>* %b
+ store <32 x i16> %ret, ptr %b
ret void
}
-define void @extract_subvector_v128i16(<128 x i16>* %a, <64 x i16>* %b) vscale_range(16,0) #0 {
+define void @extract_subvector_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: extract_subvector_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%ret = call <64 x i16> @llvm.vector.extract.v64i16.v128i16(<128 x i16> %op, i64 64)
- store <64 x i16> %ret, <64 x i16>* %b
+ store <64 x i16> %ret, ptr %b
ret void
}
ret <2 x i32> %ret
}
-define void @extract_subvector_v8i32(<8 x i32>* %a, <4 x i32>* %b) vscale_range(2,0) #0 {
+define void @extract_subvector_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: extract_subvector_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%ret = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %op, i64 4)
- store <4 x i32> %ret, <4 x i32>* %b
+ store <4 x i32> %ret, ptr %b
ret void
}
-define void @extract_subvector_v16i32(<16 x i32>* %a, <8 x i32>* %b) #0 {
+define void @extract_subvector_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: extract_subvector_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: ext z0.b, z0.b, z0.b, #32
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%ret = call <8 x i32> @llvm.vector.extract.v8i32.v16i32(<16 x i32> %op, i64 8)
- store <8 x i32> %ret, <8 x i32>* %b
+ store <8 x i32> %ret, ptr %b
ret void
}
-define void @extract_subvector_v32i32(<32 x i32>* %a, <16 x i32>* %b) vscale_range(8,0) #0 {
+define void @extract_subvector_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: extract_subvector_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%ret = call <16 x i32> @llvm.vector.extract.v16i32.v32i32(<32 x i32> %op, i64 16)
- store <16 x i32> %ret, <16 x i32>* %b
+ store <16 x i32> %ret, ptr %b
ret void
}
-define void @extract_subvector_v64i32(<64 x i32>* %a, <32 x i32>* %b) vscale_range(16,0) #0 {
+define void @extract_subvector_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: extract_subvector_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%ret = call <32 x i32> @llvm.vector.extract.v32i32.v64i32(<64 x i32> %op, i64 32)
- store <32 x i32> %ret, <32 x i32>* %b
+ store <32 x i32> %ret, ptr %b
ret void
}
ret <1 x i64> %ret
}
-define void @extract_subvector_v4i64(<4 x i64>* %a, <2 x i64>* %b) vscale_range(2,0) #0 {
+define void @extract_subvector_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: extract_subvector_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%ret = call <2 x i64> @llvm.vector.extract.v2i64.v4i64(<4 x i64> %op, i64 2)
- store <2 x i64> %ret, <2 x i64>* %b
+ store <2 x i64> %ret, ptr %b
ret void
}
-define void @extract_subvector_v8i64(<8 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @extract_subvector_v8i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: extract_subvector_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%ret = call <4 x i64> @llvm.vector.extract.v4i64.v8i64(<8 x i64> %op, i64 4)
- store <4 x i64> %ret, <4 x i64>* %b
+ store <4 x i64> %ret, ptr %b
ret void
}
-define void @extract_subvector_v16i64(<16 x i64>* %a, <8 x i64>* %b) #0 {
+define void @extract_subvector_v16i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: extract_subvector_v16i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_256-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%ret = call <8 x i64> @llvm.vector.extract.v8i64.v16i64(<16 x i64> %op, i64 8)
- store <8 x i64> %ret, <8 x i64>* %b
+ store <8 x i64> %ret, ptr %b
ret void
}
-define void @extract_subvector_v32i64(<32 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @extract_subvector_v32i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: extract_subvector_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%ret = call <16 x i64> @llvm.vector.extract.v16i64.v32i64(<32 x i64> %op, i64 16)
- store <16 x i64> %ret, <16 x i64>* %b
+ store <16 x i64> %ret, ptr %b
ret void
}
ret <4 x half> %ret
}
-define void @extract_subvector_v16f16(<16 x half>* %a, <8 x half>* %b) vscale_range(2,0) #0 {
+define void @extract_subvector_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: extract_subvector_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%ret = call <8 x half> @llvm.vector.extract.v8f16.v16f16(<16 x half> %op, i64 8)
- store <8 x half> %ret, <8 x half>* %b
+ store <8 x half> %ret, ptr %b
ret void
}
-define void @extract_subvector_v32f16(<32 x half>* %a, <16 x half>* %b) #0 {
+define void @extract_subvector_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: extract_subvector_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: ext z0.b, z0.b, z0.b, #32
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%ret = call <16 x half> @llvm.vector.extract.v16f16.v32f16(<32 x half> %op, i64 16)
- store <16 x half> %ret, <16 x half>* %b
+ store <16 x half> %ret, ptr %b
ret void
}
-define void @extract_subvector_v64f16(<64 x half>* %a, <32 x half>* %b) vscale_range(8,0) #0 {
+define void @extract_subvector_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: extract_subvector_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%ret = call <32 x half> @llvm.vector.extract.v32f16.v64f16(<64 x half> %op, i64 32)
- store <32 x half> %ret, <32 x half>* %b
+ store <32 x half> %ret, ptr %b
ret void
}
-define void @extract_subvector_v128f16(<128 x half>* %a, <64 x half>* %b) vscale_range(16,0) #0 {
+define void @extract_subvector_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: extract_subvector_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%ret = call <64 x half> @llvm.vector.extract.v64f16.v128f16(<128 x half> %op, i64 64)
- store <64 x half> %ret, <64 x half>* %b
+ store <64 x half> %ret, ptr %b
ret void
}
ret <2 x float> %ret
}
-define void @extract_subvector_v8f32(<8 x float>* %a, <4 x float>* %b) vscale_range(2,0) #0 {
+define void @extract_subvector_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: extract_subvector_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%ret = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> %op, i64 4)
- store <4 x float> %ret, <4 x float>* %b
+ store <4 x float> %ret, ptr %b
ret void
}
-define void @extract_subvector_v16f32(<16 x float>* %a, <8 x float>* %b) #0 {
+define void @extract_subvector_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: extract_subvector_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: ext z0.b, z0.b, z0.b, #32
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%ret = call <8 x float> @llvm.vector.extract.v8f32.v16f32(<16 x float> %op, i64 8)
- store <8 x float> %ret, <8 x float>* %b
+ store <8 x float> %ret, ptr %b
ret void
}
-define void @extract_subvector_v32f32(<32 x float>* %a, <16 x float>* %b) vscale_range(8,0) #0 {
+define void @extract_subvector_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: extract_subvector_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%ret = call <16 x float> @llvm.vector.extract.v16f32.v32f32(<32 x float> %op, i64 16)
- store <16 x float> %ret, <16 x float>* %b
+ store <16 x float> %ret, ptr %b
ret void
}
-define void @extract_subvector_v64f32(<64 x float>* %a, <32 x float>* %b) vscale_range(16,0) #0 {
+define void @extract_subvector_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: extract_subvector_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%ret = call <32 x float> @llvm.vector.extract.v32f32.v64f32(<64 x float> %op, i64 32)
- store <32 x float> %ret, <32 x float>* %b
+ store <32 x float> %ret, ptr %b
ret void
}
ret <1 x double> %ret
}
-define void @extract_subvector_v4f64(<4 x double>* %a, <2 x double>* %b) vscale_range(2,0) #0 {
+define void @extract_subvector_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: extract_subvector_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%ret = call <2 x double> @llvm.vector.extract.v2f64.v4f64(<4 x double> %op, i64 2)
- store <2 x double> %ret, <2 x double>* %b
+ store <2 x double> %ret, ptr %b
ret void
}
-define void @extract_subvector_v8f64(<8 x double>* %a, <4 x double>* %b) #0 {
+define void @extract_subvector_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: extract_subvector_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: ext z0.b, z0.b, z0.b, #32
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%ret = call <4 x double> @llvm.vector.extract.v4f64.v8f64(<8 x double> %op, i64 4)
- store <4 x double> %ret, <4 x double>* %b
+ store <4 x double> %ret, ptr %b
ret void
}
-define void @extract_subvector_v16f64(<16 x double>* %a, <8 x double>* %b) vscale_range(8,0) #0 {
+define void @extract_subvector_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: extract_subvector_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%ret = call <8 x double> @llvm.vector.extract.v8f64.v16f64(<16 x double> %op, i64 8)
- store <8 x double> %ret, <8 x double>* %b
+ store <8 x double> %ret, ptr %b
ret void
}
-define void @extract_subvector_v32f64(<32 x double>* %a, <16 x double>* %b) vscale_range(16,0) #0 {
+define void @extract_subvector_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: extract_subvector_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%ret = call <16 x double> @llvm.vector.extract.v16f64.v32f64(<32 x double> %op, i64 16)
- store <16 x double> %ret, <16 x double>* %b
+ store <16 x double> %ret, ptr %b
ret void
}
ret half %r
}
-define half @extractelement_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
+define half @extractelement_v16f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: extractelement_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, z0.h[15]
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
+ %op1 = load <16 x half>, ptr %a
%r = extractelement <16 x half> %op1, i64 15
ret half %r
}
-define half @extractelement_v32f16(<32 x half>* %a) #0 {
+define half @extractelement_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: extractelement_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: mov z0.h, z0.h[31]
; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
+ %op1 = load <32 x half>, ptr %a
%r = extractelement <32 x half> %op1, i64 31
ret half %r
}
-define half @extractelement_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define half @extractelement_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: extractelement_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: whilels p0.h, xzr, x8
; CHECK-NEXT: lastb h0, p0, z0.h
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
+ %op1 = load <64 x half>, ptr %a
%r = extractelement <64 x half> %op1, i64 63
ret half %r
}
-define half @extractelement_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define half @extractelement_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: extractelement_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: whilels p0.h, xzr, x8
; CHECK-NEXT: lastb h0, p0, z0.h
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
+ %op1 = load <128 x half>, ptr %a
%r = extractelement <128 x half> %op1, i64 127
ret half %r
}
ret float %r
}
-define float @extractelement_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define float @extractelement_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: extractelement_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: mov z0.s, z0.s[7]
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
+ %op1 = load <8 x float>, ptr %a
%r = extractelement <8 x float> %op1, i64 7
ret float %r
}
-define float @extractelement_v16f32(<16 x float>* %a) #0 {
+define float @extractelement_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: extractelement_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: mov z0.s, z0.s[15]
; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
+ %op1 = load <16 x float>, ptr %a
%r = extractelement <16 x float> %op1, i64 15
ret float %r
}
-define float @extractelement_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define float @extractelement_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: extractelement_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: whilels p0.s, xzr, x8
; CHECK-NEXT: lastb s0, p0, z0.s
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
+ %op1 = load <32 x float>, ptr %a
%r = extractelement <32 x float> %op1, i64 31
ret float %r
}
-define float @extractelement_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define float @extractelement_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: extractelement_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: whilels p0.s, xzr, x8
; CHECK-NEXT: lastb s0, p0, z0.s
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
+ %op1 = load <64 x float>, ptr %a
%r = extractelement <64 x float> %op1, i64 63
ret float %r
}
ret double %r
}
-define double @extractelement_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define double @extractelement_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: extractelement_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: mov z0.d, z0.d[3]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
+ %op1 = load <4 x double>, ptr %a
%r = extractelement <4 x double> %op1, i64 3
ret double %r
}
-define double @extractelement_v8f64(<8 x double>* %a) #0 {
+define double @extractelement_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: extractelement_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: mov z0.d, z0.d[7]
; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
+ %op1 = load <8 x double>, ptr %a
%r = extractelement <8 x double> %op1, i64 7
ret double %r
}
-define double @extractelement_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define double @extractelement_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: extractelement_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb d0, p0, z0.d
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
+ %op1 = load <16 x double>, ptr %a
%r = extractelement <16 x double> %op1, i64 15
ret double %r
}
-define double @extractelement_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define double @extractelement_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: extractelement_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb d0, p0, z0.d
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
+ %op1 = load <32 x double>, ptr %a
%r = extractelement <32 x double> %op1, i64 31
ret double %r
}
ret <8 x half> %res
}
-define void @fadd_v16f16(<16 x half>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @fadd_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fadd_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%res = fadd <16 x half> %op1, %op2
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @fadd_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
+define void @fadd_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fadd_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
%res = fadd <32 x half> %op1, %op2
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fadd_v64f16(<64 x half>* %a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @fadd_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fadd_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
%res = fadd <64 x half> %op1, %op2
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @fadd_v128f16(<128 x half>* %a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @fadd_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fadd_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
- %op2 = load <128 x half>, <128 x half>* %b
+ %op1 = load <128 x half>, ptr %a
+ %op2 = load <128 x half>, ptr %b
%res = fadd <128 x half> %op1, %op2
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @fadd_v8f32(<8 x float>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @fadd_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fadd_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
%res = fadd <8 x float> %op1, %op2
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @fadd_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
+define void @fadd_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fadd_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
%res = fadd <16 x float> %op1, %op2
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fadd_v32f32(<32 x float>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @fadd_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fadd_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
%res = fadd <32 x float> %op1, %op2
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @fadd_v64f32(<64 x float>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @fadd_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fadd_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
- %op2 = load <64 x float>, <64 x float>* %b
+ %op1 = load <64 x float>, ptr %a
+ %op2 = load <64 x float>, ptr %b
%res = fadd <64 x float> %op1, %op2
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @fadd_v4f64(<4 x double>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @fadd_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fadd_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%res = fadd <4 x double> %op1, %op2
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @fadd_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
+define void @fadd_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fadd_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fadd z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
%res = fadd <8 x double> %op1, %op2
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @fadd_v16f64(<16 x double>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @fadd_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fadd_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
%res = fadd <16 x double> %op1, %op2
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @fadd_v32f64(<32 x double>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @fadd_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fadd_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
- %op2 = load <32 x double>, <32 x double>* %b
+ %op1 = load <32 x double>, ptr %a
+ %op2 = load <32 x double>, ptr %b
%res = fadd <32 x double> %op1, %op2
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @fdiv_v16f16(<16 x half>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @fdiv_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fdiv_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%res = fdiv <16 x half> %op1, %op2
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @fdiv_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
+define void @fdiv_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fdiv_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
%res = fdiv <32 x half> %op1, %op2
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fdiv_v64f16(<64 x half>* %a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @fdiv_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fdiv_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
%res = fdiv <64 x half> %op1, %op2
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @fdiv_v128f16(<128 x half>* %a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @fdiv_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fdiv_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
- %op2 = load <128 x half>, <128 x half>* %b
+ %op1 = load <128 x half>, ptr %a
+ %op2 = load <128 x half>, ptr %b
%res = fdiv <128 x half> %op1, %op2
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @fdiv_v8f32(<8 x float>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @fdiv_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fdiv_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
%res = fdiv <8 x float> %op1, %op2
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @fdiv_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
+define void @fdiv_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fdiv_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
%res = fdiv <16 x float> %op1, %op2
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fdiv_v32f32(<32 x float>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @fdiv_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fdiv_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
%res = fdiv <32 x float> %op1, %op2
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @fdiv_v64f32(<64 x float>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @fdiv_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fdiv_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
- %op2 = load <64 x float>, <64 x float>* %b
+ %op1 = load <64 x float>, ptr %a
+ %op2 = load <64 x float>, ptr %b
%res = fdiv <64 x float> %op1, %op2
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @fdiv_v4f64(<4 x double>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @fdiv_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fdiv_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%res = fdiv <4 x double> %op1, %op2
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @fdiv_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
+define void @fdiv_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fdiv_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fdiv z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
%res = fdiv <8 x double> %op1, %op2
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @fdiv_v16f64(<16 x double>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @fdiv_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fdiv_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
%res = fdiv <16 x double> %op1, %op2
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @fdiv_v32f64(<32 x double>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @fdiv_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fdiv_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
- %op2 = load <32 x double>, <32 x double>* %b
+ %op1 = load <32 x double>, ptr %a
+ %op2 = load <32 x double>, ptr %b
%res = fdiv <32 x double> %op1, %op2
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @fma_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x half>* %c) vscale_range(2,0) #0 {
+define void @fma_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fma_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
- %op3 = load <16 x half>, <16 x half>* %c
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
+ %op3 = load <16 x half>, ptr %c
%res = call <16 x half> @llvm.fma.v16f16(<16 x half> %op1, <16 x half> %op2, <16 x half> %op3)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @fma_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x half>* %c) #0 {
+define void @fma_v32f16(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: fma_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
- %op3 = load <32 x half>, <32 x half>* %c
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
+ %op3 = load <32 x half>, ptr %c
%res = call <32 x half> @llvm.fma.v32f16(<32 x half> %op1, <32 x half> %op2, <32 x half> %op3)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fma_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x half>* %c) vscale_range(8,0) #0 {
+define void @fma_v64f16(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: fma_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
- %op3 = load <64 x half>, <64 x half>* %c
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
+ %op3 = load <64 x half>, ptr %c
%res = call <64 x half> @llvm.fma.v64f16(<64 x half> %op1, <64 x half> %op2, <64 x half> %op3)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @fma_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x half>* %c) vscale_range(16,0) #0 {
+define void @fma_v128f16(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: fma_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
- %op2 = load <128 x half>, <128 x half>* %b
- %op3 = load <128 x half>, <128 x half>* %c
+ %op1 = load <128 x half>, ptr %a
+ %op2 = load <128 x half>, ptr %b
+ %op3 = load <128 x half>, ptr %c
%res = call <128 x half> @llvm.fma.v128f16(<128 x half> %op1, <128 x half> %op2, <128 x half> %op3)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @fma_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) vscale_range(2,0) #0 {
+define void @fma_v8f32(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fma_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
- %op3 = load <8 x float>, <8 x float>* %c
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
+ %op3 = load <8 x float>, ptr %c
%res = call <8 x float> @llvm.fma.v8f32(<8 x float> %op1, <8 x float> %op2, <8 x float> %op3)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @fma_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x float>* %c) #0 {
+define void @fma_v16f32(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: fma_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
- %op3 = load <16 x float>, <16 x float>* %c
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
+ %op3 = load <16 x float>, ptr %c
%res = call <16 x float> @llvm.fma.v16f32(<16 x float> %op1, <16 x float> %op2, <16 x float> %op3)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fma_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x float>* %c) vscale_range(8,0) #0 {
+define void @fma_v32f32(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: fma_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
- %op3 = load <32 x float>, <32 x float>* %c
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
+ %op3 = load <32 x float>, ptr %c
%res = call <32 x float> @llvm.fma.v32f32(<32 x float> %op1, <32 x float> %op2, <32 x float> %op3)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @fma_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x float>* %c) vscale_range(16,0) #0 {
+define void @fma_v64f32(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: fma_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
- %op2 = load <64 x float>, <64 x float>* %b
- %op3 = load <64 x float>, <64 x float>* %c
+ %op1 = load <64 x float>, ptr %a
+ %op2 = load <64 x float>, ptr %b
+ %op3 = load <64 x float>, ptr %c
%res = call <64 x float> @llvm.fma.v64f32(<64 x float> %op1, <64 x float> %op2, <64 x float> %op3)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @fma_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x double>* %c) vscale_range(2,0) #0 {
+define void @fma_v4f64(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fma_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
- %op3 = load <4 x double>, <4 x double>* %c
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
+ %op3 = load <4 x double>, ptr %c
%res = call <4 x double> @llvm.fma.v4f64(<4 x double> %op1, <4 x double> %op2, <4 x double> %op3)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @fma_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x double>* %c) #0 {
+define void @fma_v8f64(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: fma_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
- %op3 = load <8 x double>, <8 x double>* %c
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
+ %op3 = load <8 x double>, ptr %c
%res = call <8 x double> @llvm.fma.v8f64(<8 x double> %op1, <8 x double> %op2, <8 x double> %op3)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @fma_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x double>* %c) vscale_range(8,0) #0 {
+define void @fma_v16f64(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: fma_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
- %op3 = load <16 x double>, <16 x double>* %c
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
+ %op3 = load <16 x double>, ptr %c
%res = call <16 x double> @llvm.fma.v16f64(<16 x double> %op1, <16 x double> %op2, <16 x double> %op3)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @fma_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x double>* %c) vscale_range(16,0) #0 {
+define void @fma_v32f64(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: fma_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
- %op2 = load <32 x double>, <32 x double>* %b
- %op3 = load <32 x double>, <32 x double>* %c
+ %op1 = load <32 x double>, ptr %a
+ %op2 = load <32 x double>, ptr %b
+ %op3 = load <32 x double>, ptr %c
%res = call <32 x double> @llvm.fma.v32f64(<32 x double> %op1, <32 x double> %op2, <32 x double> %op3)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @fmul_v16f16(<16 x half>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @fmul_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fmul_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%res = fmul <16 x half> %op1, %op2
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @fmul_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
+define void @fmul_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fmul_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
%res = fmul <32 x half> %op1, %op2
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fmul_v64f16(<64 x half>* %a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @fmul_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fmul_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
%res = fmul <64 x half> %op1, %op2
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @fmul_v128f16(<128 x half>* %a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @fmul_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fmul_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
- %op2 = load <128 x half>, <128 x half>* %b
+ %op1 = load <128 x half>, ptr %a
+ %op2 = load <128 x half>, ptr %b
%res = fmul <128 x half> %op1, %op2
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @fmul_v8f32(<8 x float>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @fmul_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fmul_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
%res = fmul <8 x float> %op1, %op2
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @fmul_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
+define void @fmul_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fmul_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fmul z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
%res = fmul <16 x float> %op1, %op2
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fmul_v32f32(<32 x float>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @fmul_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fmul_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
%res = fmul <32 x float> %op1, %op2
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @fmul_v64f32(<64 x float>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @fmul_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fmul_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
- %op2 = load <64 x float>, <64 x float>* %b
+ %op1 = load <64 x float>, ptr %a
+ %op2 = load <64 x float>, ptr %b
%res = fmul <64 x float> %op1, %op2
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @fmul_v4f64(<4 x double>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @fmul_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fmul_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%res = fmul <4 x double> %op1, %op2
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @fmul_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
+define void @fmul_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fmul_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fmul z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
%res = fmul <8 x double> %op1, %op2
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @fmul_v16f64(<16 x double>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @fmul_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fmul_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
%res = fmul <16 x double> %op1, %op2
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @fmul_v32f64(<32 x double>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @fmul_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fmul_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
- %op2 = load <32 x double>, <32 x double>* %b
+ %op1 = load <32 x double>, ptr %a
+ %op2 = load <32 x double>, ptr %b
%res = fmul <32 x double> %op1, %op2
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @fneg_v16f16(<16 x half>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @fneg_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fneg_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = fneg <16 x half> %op
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @fneg_v32f16(<32 x half>* %a) #0 {
+define void @fneg_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: fneg_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fneg z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = fneg <32 x half> %op
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fneg_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define void @fneg_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fneg_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = fneg <64 x half> %op
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @fneg_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define void @fneg_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fneg_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = fneg <128 x half> %op
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @fneg_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define void @fneg_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fneg_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fneg z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = fneg <8 x float> %op
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @fneg_v16f32(<16 x float>* %a) #0 {
+define void @fneg_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: fneg_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fneg z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = fneg <16 x float> %op
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fneg_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define void @fneg_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fneg_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fneg z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = fneg <32 x float> %op
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @fneg_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define void @fneg_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fneg_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fneg z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = fneg <64 x float> %op
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @fneg_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define void @fneg_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fneg_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fneg z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = fneg <4 x double> %op
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @fneg_v8f64(<8 x double>* %a) #0 {
+define void @fneg_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: fneg_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fneg z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = fneg <8 x double> %op
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @fneg_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define void @fneg_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fneg_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fneg z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = fneg <16 x double> %op
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @fneg_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define void @fneg_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fneg_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fneg z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = fneg <32 x double> %op
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @fsqrt_v16f16(<16 x half>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @fsqrt_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fsqrt_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %op)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @fsqrt_v32f16(<32 x half>* %a) #0 {
+define void @fsqrt_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: fsqrt_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fsqrt z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %op)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fsqrt_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define void @fsqrt_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fsqrt_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = call <64 x half> @llvm.sqrt.v64f16(<64 x half> %op)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @fsqrt_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define void @fsqrt_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fsqrt_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = call <128 x half> @llvm.sqrt.v128f16(<128 x half> %op)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @fsqrt_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define void @fsqrt_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fsqrt_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %op)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @fsqrt_v16f32(<16 x float>* %a) #0 {
+define void @fsqrt_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: fsqrt_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fsqrt z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %op)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fsqrt_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define void @fsqrt_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fsqrt_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = call <32 x float> @llvm.sqrt.v32f32(<32 x float> %op)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @fsqrt_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define void @fsqrt_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fsqrt_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = call <64 x float> @llvm.sqrt.v64f32(<64 x float> %op)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @fsqrt_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define void @fsqrt_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fsqrt_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %op)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @fsqrt_v8f64(<8 x double>* %a) #0 {
+define void @fsqrt_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: fsqrt_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fsqrt z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %op)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @fsqrt_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define void @fsqrt_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fsqrt_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = call <16 x double> @llvm.sqrt.v16f64(<16 x double> %op)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @fsqrt_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define void @fsqrt_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fsqrt_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = call <32 x double> @llvm.sqrt.v32f64(<32 x double> %op)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @fsub_v16f16(<16 x half>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @fsub_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fsub_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%res = fsub <16 x half> %op1, %op2
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @fsub_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
+define void @fsub_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fsub_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fsub z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
%res = fsub <32 x half> %op1, %op2
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fsub_v64f16(<64 x half>* %a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @fsub_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fsub_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
%res = fsub <64 x half> %op1, %op2
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @fsub_v128f16(<128 x half>* %a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @fsub_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fsub_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
- %op2 = load <128 x half>, <128 x half>* %b
+ %op1 = load <128 x half>, ptr %a
+ %op2 = load <128 x half>, ptr %b
%res = fsub <128 x half> %op1, %op2
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @fsub_v8f32(<8 x float>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @fsub_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fsub_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
%res = fsub <8 x float> %op1, %op2
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @fsub_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
+define void @fsub_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fsub_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fsub z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
%res = fsub <16 x float> %op1, %op2
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fsub_v32f32(<32 x float>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @fsub_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fsub_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
%res = fsub <32 x float> %op1, %op2
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @fsub_v64f32(<64 x float>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @fsub_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fsub_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
- %op2 = load <64 x float>, <64 x float>* %b
+ %op1 = load <64 x float>, ptr %a
+ %op2 = load <64 x float>, ptr %b
%res = fsub <64 x float> %op1, %op2
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @fsub_v4f64(<4 x double>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @fsub_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fsub_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%res = fsub <4 x double> %op1, %op2
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @fsub_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
+define void @fsub_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fsub_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fsub z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
%res = fsub <8 x double> %op1, %op2
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @fsub_v16f64(<16 x double>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @fsub_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fsub_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
%res = fsub <16 x double> %op1, %op2
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @fsub_v32f64(<32 x double>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @fsub_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fsub_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
- %op2 = load <32 x double>, <32 x double>* %b
+ %op1 = load <32 x double>, ptr %a
+ %op2 = load <32 x double>, ptr %b
%res = fsub <32 x double> %op1, %op2
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @fabs_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
+define void @fabs_v16f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fabs_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fabs z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.fabs.v16f16(<16 x half> %op)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @fabs_v32f16(<32 x half>* %a) #0 {
+define void @fabs_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: fabs_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fabs z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = call <32 x half> @llvm.fabs.v32f16(<32 x half> %op)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fabs_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define void @fabs_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fabs_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fabs z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = call <64 x half> @llvm.fabs.v64f16(<64 x half> %op)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @fabs_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define void @fabs_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fabs_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fabs z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = call <128 x half> @llvm.fabs.v128f16(<128 x half> %op)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @fabs_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define void @fabs_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fabs_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fabs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.fabs.v8f32(<8 x float> %op)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @fabs_v16f32(<16 x float>* %a) #0 {
+define void @fabs_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: fabs_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fabs z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = call <16 x float> @llvm.fabs.v16f32(<16 x float> %op)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fabs_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define void @fabs_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fabs_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fabs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = call <32 x float> @llvm.fabs.v32f32(<32 x float> %op)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @fabs_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define void @fabs_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fabs_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fabs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = call <64 x float> @llvm.fabs.v64f32(<64 x float> %op)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @fabs_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define void @fabs_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fabs_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fabs z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.fabs.v4f64(<4 x double> %op)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @fabs_v8f64(<8 x double>* %a) #0 {
+define void @fabs_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: fabs_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fabs z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = call <8 x double> @llvm.fabs.v8f64(<8 x double> %op)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @fabs_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define void @fabs_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fabs_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fabs z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = call <16 x double> @llvm.fabs.v16f64(<16 x double> %op)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @fabs_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define void @fabs_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fabs_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fabs z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = call <32 x double> @llvm.fabs.v32f64(<32 x double> %op)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x i16> %sext
}
-define void @fcmp_oeq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_oeq_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_oeq_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp oeq <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
-define void @fcmp_oeq_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x i16>* %c) #0 {
+define void @fcmp_oeq_v32f16(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: fcmp_oeq_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
%cmp = fcmp oeq <32 x half> %op1, %op2
%sext = sext <32 x i1> %cmp to <32 x i16>
- store <32 x i16> %sext, <32 x i16>* %c
+ store <32 x i16> %sext, ptr %c
ret void
}
-define void @fcmp_oeq_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x i16>* %c) vscale_range(8,0) #0 {
+define void @fcmp_oeq_v64f16(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: fcmp_oeq_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
%cmp = fcmp oeq <64 x half> %op1, %op2
%sext = sext <64 x i1> %cmp to <64 x i16>
- store <64 x i16> %sext, <64 x i16>* %c
+ store <64 x i16> %sext, ptr %c
ret void
}
-define void @fcmp_oeq_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x i16>* %c) vscale_range(16,0) #0 {
+define void @fcmp_oeq_v128f16(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: fcmp_oeq_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
- %op2 = load <128 x half>, <128 x half>* %b
+ %op1 = load <128 x half>, ptr %a
+ %op2 = load <128 x half>, ptr %b
%cmp = fcmp oeq <128 x half> %op1, %op2
%sext = sext <128 x i1> %cmp to <128 x i16>
- store <128 x i16> %sext, <128 x i16>* %c
+ store <128 x i16> %sext, ptr %c
ret void
}
ret <4 x i32> %sext
}
-define void @fcmp_oeq_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x i32>* %c) vscale_range(2,0) #0 {
+define void @fcmp_oeq_v8f32(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_oeq_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
%cmp = fcmp oeq <8 x float> %op1, %op2
%sext = sext <8 x i1> %cmp to <8 x i32>
- store <8 x i32> %sext, <8 x i32>* %c
+ store <8 x i32> %sext, ptr %c
ret void
}
-define void @fcmp_oeq_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x i32>* %c) #0 {
+define void @fcmp_oeq_v16f32(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: fcmp_oeq_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
%cmp = fcmp oeq <16 x float> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i32>
- store <16 x i32> %sext, <16 x i32>* %c
+ store <16 x i32> %sext, ptr %c
ret void
}
-define void @fcmp_oeq_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x i32>* %c) vscale_range(8,0) #0 {
+define void @fcmp_oeq_v32f32(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: fcmp_oeq_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
%cmp = fcmp oeq <32 x float> %op1, %op2
%sext = sext <32 x i1> %cmp to <32 x i32>
- store <32 x i32> %sext, <32 x i32>* %c
+ store <32 x i32> %sext, ptr %c
ret void
}
-define void @fcmp_oeq_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x i32>* %c) vscale_range(16,0) #0 {
+define void @fcmp_oeq_v64f32(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: fcmp_oeq_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
- %op2 = load <64 x float>, <64 x float>* %b
+ %op1 = load <64 x float>, ptr %a
+ %op2 = load <64 x float>, ptr %b
%cmp = fcmp oeq <64 x float> %op1, %op2
%sext = sext <64 x i1> %cmp to <64 x i32>
- store <64 x i32> %sext, <64 x i32>* %c
+ store <64 x i32> %sext, ptr %c
ret void
}
ret <2 x i64> %sext
}
-define void @fcmp_oeq_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x i64>* %c) vscale_range(2,0) #0 {
+define void @fcmp_oeq_v4f64(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_oeq_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%cmp = fcmp oeq <4 x double> %op1, %op2
%sext = sext <4 x i1> %cmp to <4 x i64>
- store <4 x i64> %sext, <4 x i64>* %c
+ store <4 x i64> %sext, ptr %c
ret void
}
-define void @fcmp_oeq_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x i64>* %c) #0 {
+define void @fcmp_oeq_v8f64(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: fcmp_oeq_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
%cmp = fcmp oeq <8 x double> %op1, %op2
%sext = sext <8 x i1> %cmp to <8 x i64>
- store <8 x i64> %sext, <8 x i64>* %c
+ store <8 x i64> %sext, ptr %c
ret void
}
-define void @fcmp_oeq_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x i64>* %c) vscale_range(8,0) #0 {
+define void @fcmp_oeq_v16f64(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: fcmp_oeq_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
%cmp = fcmp oeq <16 x double> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i64>
- store <16 x i64> %sext, <16 x i64>* %c
+ store <16 x i64> %sext, ptr %c
ret void
}
-define void @fcmp_oeq_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x i64>* %c) vscale_range(16,0) #0 {
+define void @fcmp_oeq_v32f64(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: fcmp_oeq_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
- %op2 = load <32 x double>, <32 x double>* %b
+ %op1 = load <32 x double>, ptr %a
+ %op2 = load <32 x double>, ptr %b
%cmp = fcmp oeq <32 x double> %op1, %op2
%sext = sext <32 x i1> %cmp to <32 x i64>
- store <32 x i64> %sext, <32 x i64>* %c
+ store <32 x i64> %sext, ptr %c
ret void
}
; FCMP UEQ
;
-define void @fcmp_ueq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_ueq_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_ueq_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp ueq <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP ONE
;
-define void @fcmp_one_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_one_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_one_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp one <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP UNE
;
-define void @fcmp_une_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_une_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_une_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp une <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP OGT
;
-define void @fcmp_ogt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_ogt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_ogt_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp ogt <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP UGT
;
-define void @fcmp_ugt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_ugt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_ugt_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp ugt <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP OLT
;
-define void @fcmp_olt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_olt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_olt_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp olt <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP ULT
;
-define void @fcmp_ult_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_ult_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_ult_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp ult <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP OGE
;
-define void @fcmp_oge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_oge_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_oge_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp oge <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP UGE
;
-define void @fcmp_uge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_uge_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_uge_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp uge <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP OLE
;
-define void @fcmp_ole_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_ole_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_ole_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp ole <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP ULE
;
-define void @fcmp_ule_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_ule_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_ule_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp ule <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP UNO
;
-define void @fcmp_uno_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_uno_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_uno_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp uno <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP ORD
;
-define void @fcmp_ord_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_ord_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_ord_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp ord <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP EQ
;
-define void @fcmp_eq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_eq_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_eq_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp fast oeq <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP NE
;
-define void @fcmp_ne_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_ne_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_ne_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp fast one <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP GT
;
-define void @fcmp_gt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_gt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_gt_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp fast ogt <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP LT
;
-define void @fcmp_lt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_lt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_lt_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp fast olt <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP GE
;
-define void @fcmp_ge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_ge_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_ge_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp fast oge <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
; FCMP LE
;
-define void @fcmp_le_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) vscale_range(2,0) #0 {
+define void @fcmp_le_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fcmp_le_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%cmp = fcmp fast ole <16 x half> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %c
+ store <16 x i16> %sext, ptr %c
ret void
}
target triple = "aarch64-unknown-linux-gnu"
; Ensure we don't crash when trying to combine fp<->int conversions
-define void @fp_convert_combine_crash(<8 x float> *%a, <8 x i32> *%b) #0 {
+define void @fp_convert_combine_crash(ptr %a, ptr %b) #0 {
; CHECK-LABEL: fp_convert_combine_crash:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %f = load <8 x float>, <8 x float>* %a
+ %f = load <8 x float>, ptr %a
%mul.i = fmul <8 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00,
float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
%vcvt.i = fptosi <8 x float> %mul.i to <8 x i32>
- store <8 x i32> %vcvt.i, <8 x i32>* %b
+ store <8 x i32> %vcvt.i, ptr %b
ret void
}
;
; Don't use SVE for 64-bit vectors.
-define void @fcvt_v2f16_v2f32(<2 x half>* %a, <2 x float>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v2f16_v2f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v2f16_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: fcvtl v0.4s, v0.4h
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
- %op1 = load <2 x half>, <2 x half>* %a
+ %op1 = load <2 x half>, ptr %a
%res = fpext <2 x half> %op1 to <2 x float>
- store <2 x float> %res, <2 x float>* %b
+ store <2 x float> %res, ptr %b
ret void
}
; Don't use SVE for 128-bit vectors.
-define void @fcvt_v4f16_v4f32(<4 x half>* %a, <4 x float>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v4f16_v4f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v4f16_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fcvtl v0.4s, v0.4h
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x half>, <4 x half>* %a
+ %op1 = load <4 x half>, ptr %a
%res = fpext <4 x half> %op1 to <4 x float>
- store <4 x float> %res, <4 x float>* %b
+ store <4 x float> %res, ptr %b
ret void
}
-define void @fcvt_v8f16_v8f32(<8 x half>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v8f16_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v8f16_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x half>, <8 x half>* %a
+ %op1 = load <8 x half>, ptr %a
%res = fpext <8 x half> %op1 to <8 x float>
- store <8 x float> %res, <8 x float>* %b
+ store <8 x float> %res, ptr %b
ret void
}
-define void @fcvt_v16f16_v16f32(<16 x half>* %a, <16 x float>* %b) #0 {
+define void @fcvt_v16f16_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvt_v16f16_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fcvt z0.s, p0/m, z0.h
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
+ %op1 = load <16 x half>, ptr %a
%res = fpext <16 x half> %op1 to <16 x float>
- store <16 x float> %res, <16 x float>* %b
+ store <16 x float> %res, ptr %b
ret void
}
-define void @fcvt_v32f16_v32f32(<32 x half>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @fcvt_v32f16_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvt_v32f16_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
+ %op1 = load <32 x half>, ptr %a
%res = fpext <32 x half> %op1 to <32 x float>
- store <32 x float> %res, <32 x float>* %b
+ store <32 x float> %res, ptr %b
ret void
}
-define void @fcvt_v64f16_v64f32(<64 x half>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @fcvt_v64f16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvt_v64f16_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
+ %op1 = load <64 x half>, ptr %a
%res = fpext <64 x half> %op1 to <64 x float>
- store <64 x float> %res, <64 x float>* %b
+ store <64 x float> %res, ptr %b
ret void
}
;
; Don't use SVE for 64-bit vectors.
-define void @fcvt_v1f16_v1f64(<1 x half>* %a, <1 x double>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v1f16_v1f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v1f16_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: fcvt d0, h0
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
- %op1 = load <1 x half>, <1 x half>* %a
+ %op1 = load <1 x half>, ptr %a
%res = fpext <1 x half> %op1 to <1 x double>
- store <1 x double> %res, <1 x double>* %b
+ store <1 x double> %res, ptr %b
ret void
}
; v2f16 is not legal for NEON, so use SVE
-define void @fcvt_v2f16_v2f64(<2 x half>* %a, <2 x double>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v2f16_v2f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v2f16_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op1 = load <2 x half>, <2 x half>* %a
+ %op1 = load <2 x half>, ptr %a
%res = fpext <2 x half> %op1 to <2 x double>
- store <2 x double> %res, <2 x double>* %b
+ store <2 x double> %res, ptr %b
ret void
}
-define void @fcvt_v4f16_v4f64(<4 x half>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v4f16_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v4f16_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x half>, <4 x half>* %a
+ %op1 = load <4 x half>, ptr %a
%res = fpext <4 x half> %op1 to <4 x double>
- store <4 x double> %res, <4 x double>* %b
+ store <4 x double> %res, ptr %b
ret void
}
-define void @fcvt_v8f16_v8f64(<8 x half>* %a, <8 x double>* %b) #0 {
+define void @fcvt_v8f16_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvt_v8f16_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fcvt z0.d, p0/m, z0.h
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x half>, <8 x half>* %a
+ %op1 = load <8 x half>, ptr %a
%res = fpext <8 x half> %op1 to <8 x double>
- store <8 x double> %res, <8 x double>* %b
+ store <8 x double> %res, ptr %b
ret void
}
-define void @fcvt_v16f16_v16f64(<16 x half>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @fcvt_v16f16_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvt_v16f16_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
+ %op1 = load <16 x half>, ptr %a
%res = fpext <16 x half> %op1 to <16 x double>
- store <16 x double> %res, <16 x double>* %b
+ store <16 x double> %res, ptr %b
ret void
}
-define void @fcvt_v32f16_v32f64(<32 x half>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @fcvt_v32f16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvt_v32f16_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
+ %op1 = load <32 x half>, ptr %a
%res = fpext <32 x half> %op1 to <32 x double>
- store <32 x double> %res, <32 x double>* %b
+ store <32 x double> %res, ptr %b
ret void
}
;
; Don't use SVE for 64-bit vectors.
-define void @fcvt_v1f32_v1f64(<1 x float>* %a, <1 x double>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v1f32_v1f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v1f32_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
- %op1 = load <1 x float>, <1 x float>* %a
+ %op1 = load <1 x float>, ptr %a
%res = fpext <1 x float> %op1 to <1 x double>
- store <1 x double> %res, <1 x double>* %b
+ store <1 x double> %res, ptr %b
ret void
}
; Don't use SVE for 128-bit vectors.
-define void @fcvt_v2f32_v2f64(<2 x float>* %a, <2 x double>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v2f32_v2f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v2f32_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op1 = load <2 x float>, <2 x float>* %a
+ %op1 = load <2 x float>, ptr %a
%res = fpext <2 x float> %op1 to <2 x double>
- store <2 x double> %res, <2 x double>* %b
+ store <2 x double> %res, ptr %b
ret void
}
-define void @fcvt_v4f32_v4f64(<4 x float>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v4f32_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v4f32_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x float>, <4 x float>* %a
+ %op1 = load <4 x float>, ptr %a
%res = fpext <4 x float> %op1 to <4 x double>
- store <4 x double> %res, <4 x double>* %b
+ store <4 x double> %res, ptr %b
ret void
}
-define void @fcvt_v8f32_v8f64(<8 x float>* %a, <8 x double>* %b) #0 {
+define void @fcvt_v8f32_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvt_v8f32_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fcvt z0.d, p0/m, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
+ %op1 = load <8 x float>, ptr %a
%res = fpext <8 x float> %op1 to <8 x double>
- store <8 x double> %res, <8 x double>* %b
+ store <8 x double> %res, ptr %b
ret void
}
-define void @fcvt_v16f32_v16f64(<16 x float>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @fcvt_v16f32_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvt_v16f32_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
+ %op1 = load <16 x float>, ptr %a
%res = fpext <16 x float> %op1 to <16 x double>
- store <16 x double> %res, <16 x double>* %b
+ store <16 x double> %res, ptr %b
ret void
}
-define void @fcvt_v32f32_v32f64(<32 x float>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @fcvt_v32f32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvt_v32f32_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
+ %op1 = load <32 x float>, ptr %a
%res = fpext <32 x float> %op1 to <32 x double>
- store <32 x double> %res, <32 x double>* %b
+ store <32 x double> %res, ptr %b
ret void
}
;
; Don't use SVE for 64-bit vectors.
-define void @fcvt_v2f32_v2f16(<2 x float>* %a, <2 x half>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v2f32_v2f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v2f32_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: str s0, [x1]
; CHECK-NEXT: ret
- %op1 = load <2 x float>, <2 x float>* %a
+ %op1 = load <2 x float>, ptr %a
%res = fptrunc <2 x float> %op1 to <2 x half>
- store <2 x half> %res, <2 x half>* %b
+ store <2 x half> %res, ptr %b
ret void
}
; Don't use SVE for 128-bit vectors.
-define void @fcvt_v4f32_v4f16(<4 x float>* %a, <4 x half>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v4f32_v4f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v4f32_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x float>, <4 x float>* %a
+ %op1 = load <4 x float>, ptr %a
%res = fptrunc <4 x float> %op1 to <4 x half>
- store <4 x half> %res, <4 x half>* %b
+ store <4 x half> %res, ptr %b
ret void
}
-define void @fcvt_v8f32_v8f16(<8 x float>* %a, <8 x half>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v8f32_v8f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v8f32_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
+ %op1 = load <8 x float>, ptr %a
%res = fptrunc <8 x float> %op1 to <8 x half>
- store <8 x half> %res, <8 x half>* %b
+ store <8 x half> %res, ptr %b
ret void
}
-define void @fcvt_v16f32_v16f16(<16 x float>* %a, <16 x half>* %b) #0 {
+define void @fcvt_v16f32_v16f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvt_v16f32_v16f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fcvt z0.h, p0/m, z0.s
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
+ %op1 = load <16 x float>, ptr %a
%res = fptrunc <16 x float> %op1 to <16 x half>
- store <16 x half> %res, <16 x half>* %b
+ store <16 x half> %res, ptr %b
ret void
}
-define void @fcvt_v32f32_v32f16(<32 x float>* %a, <32 x half>* %b) vscale_range(8,0) #0 {
+define void @fcvt_v32f32_v32f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvt_v32f32_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
+ %op1 = load <32 x float>, ptr %a
%res = fptrunc <32 x float> %op1 to <32 x half>
- store <32 x half> %res, <32 x half>* %b
+ store <32 x half> %res, ptr %b
ret void
}
-define void @fcvt_v64f32_v64f16(<64 x float>* %a, <64 x half>* %b) vscale_range(16,0) #0 {
+define void @fcvt_v64f32_v64f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvt_v64f32_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
+ %op1 = load <64 x float>, ptr %a
%res = fptrunc <64 x float> %op1 to <64 x half>
- store <64 x half> %res, <64 x half>* %b
+ store <64 x half> %res, ptr %b
ret void
}
;
; Don't use SVE for 64-bit vectors.
-define void @fcvt_v1f64_v1f16(<1 x double>* %a, <1 x half>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v1f64_v1f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v1f64_v1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fcvt h0, d0
; CHECK-NEXT: str h0, [x1]
; CHECK-NEXT: ret
- %op1 = load <1 x double>, <1 x double>* %a
+ %op1 = load <1 x double>, ptr %a
%res = fptrunc <1 x double> %op1 to <1 x half>
- store <1 x half> %res, <1 x half>* %b
+ store <1 x half> %res, ptr %b
ret void
}
; v2f16 is not legal for NEON, so use SVE
-define void @fcvt_v2f64_v2f16(<2 x double>* %a, <2 x half>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v2f64_v2f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v2f64_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: str s0, [x1]
; CHECK-NEXT: ret
- %op1 = load <2 x double>, <2 x double>* %a
+ %op1 = load <2 x double>, ptr %a
%res = fptrunc <2 x double> %op1 to <2 x half>
- store <2 x half> %res, <2 x half>* %b
+ store <2 x half> %res, ptr %b
ret void
}
-define void @fcvt_v4f64_v4f16(<4 x double>* %a, <4 x half>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v4f64_v4f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v4f64_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
+ %op1 = load <4 x double>, ptr %a
%res = fptrunc <4 x double> %op1 to <4 x half>
- store <4 x half> %res, <4 x half>* %b
+ store <4 x half> %res, ptr %b
ret void
}
-define void @fcvt_v8f64_v8f16(<8 x double>* %a, <8 x half>* %b) #0 {
+define void @fcvt_v8f64_v8f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvt_v8f64_v8f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fcvt z0.h, p0/m, z0.d
; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
+ %op1 = load <8 x double>, ptr %a
%res = fptrunc <8 x double> %op1 to <8 x half>
- store <8 x half> %res, <8 x half>* %b
+ store <8 x half> %res, ptr %b
ret void
}
-define void @fcvt_v16f64_v16f16(<16 x double>* %a, <16 x half>* %b) vscale_range(8,0) #0 {
+define void @fcvt_v16f64_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvt_v16f64_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
+ %op1 = load <16 x double>, ptr %a
%res = fptrunc <16 x double> %op1 to <16 x half>
- store <16 x half> %res, <16 x half>* %b
+ store <16 x half> %res, ptr %b
ret void
}
-define void @fcvt_v32f64_v32f16(<32 x double>* %a, <32 x half>* %b) vscale_range(16,0) #0 {
+define void @fcvt_v32f64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvt_v32f64_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
+ %op1 = load <32 x double>, ptr %a
%res = fptrunc <32 x double> %op1 to <32 x half>
- store <32 x half> %res, <32 x half>* %b
+ store <32 x half> %res, ptr %b
ret void
}
;
; Don't use SVE for 64-bit vectors.
-define void @fcvt_v1f64_v1f32(<1 x double> %op1, <1 x float>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v1f64_v1f32(<1 x double> %op1, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v1f64_v1f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
%res = fptrunc <1 x double> %op1 to <1 x float>
- store <1 x float> %res, <1 x float>* %b
+ store <1 x float> %res, ptr %b
ret void
}
; Don't use SVE for 128-bit vectors.
-define void @fcvt_v2f64_v2f32(<2 x double> %op1, <2 x float>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v2f64_v2f32(<2 x double> %op1, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v2f64_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtn v0.2s, v0.2d
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%res = fptrunc <2 x double> %op1 to <2 x float>
- store <2 x float> %res, <2 x float>* %b
+ store <2 x float> %res, ptr %b
ret void
}
-define void @fcvt_v4f64_v4f32(<4 x double>* %a, <4 x float>* %b) vscale_range(2,0) #0 {
+define void @fcvt_v4f64_v4f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvt_v4f64_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
+ %op1 = load <4 x double>, ptr %a
%res = fptrunc <4 x double> %op1 to <4 x float>
- store <4 x float> %res, <4 x float>* %b
+ store <4 x float> %res, ptr %b
ret void
}
-define void @fcvt_v8f64_v8f32(<8 x double>* %a, <8 x float>* %b) #0 {
+define void @fcvt_v8f64_v8f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvt_v8f64_v8f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fcvt z0.s, p0/m, z0.d
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
+ %op1 = load <8 x double>, ptr %a
%res = fptrunc <8 x double> %op1 to <8 x float>
- store <8 x float> %res, <8 x float>* %b
+ store <8 x float> %res, ptr %b
ret void
}
-define void @fcvt_v16f64_v16f32(<16 x double>* %a, <16 x float>* %b) vscale_range(8,0) #0 {
+define void @fcvt_v16f64_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvt_v16f64_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
+ %op1 = load <16 x double>, ptr %a
%res = fptrunc <16 x double> %op1 to <16 x float>
- store <16 x float> %res, <16 x float>* %b
+ store <16 x float> %res, ptr %b
ret void
}
-define void @fcvt_v32f64_v32f32(<32 x double>* %a, <32 x float>* %b) vscale_range(16,0) #0 {
+define void @fcvt_v32f64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvt_v32f64_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
+ %op1 = load <32 x double>, ptr %a
%res = fptrunc <32 x double> %op1 to <32 x float>
- store <32 x float> %res, <32 x float>* %b
+ store <32 x float> %res, ptr %b
ret void
}
ret <8 x half> %res
}
-define void @fma_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x half>* %c) vscale_range(2,0) #0 {
+define void @fma_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fma_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
- %op3 = load <16 x half>, <16 x half>* %c
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
+ %op3 = load <16 x half>, ptr %c
%mul = fmul contract <16 x half> %op1, %op2
%res = fadd contract <16 x half> %mul, %op3
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @fma_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x half>* %c) #0 {
+define void @fma_v32f16(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: fma_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
- %op3 = load <32 x half>, <32 x half>* %c
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
+ %op3 = load <32 x half>, ptr %c
%mul = fmul contract <32 x half> %op1, %op2
%res = fadd contract <32 x half> %mul, %op3
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fma_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x half>* %c) vscale_range(8,0) #0 {
+define void @fma_v64f16(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: fma_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
- %op3 = load <64 x half>, <64 x half>* %c
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
+ %op3 = load <64 x half>, ptr %c
%mul = fmul contract <64 x half> %op1, %op2
%res = fadd contract <64 x half> %mul, %op3
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @fma_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x half>* %c) vscale_range(16,0) #0 {
+define void @fma_v128f16(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: fma_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
- %op2 = load <128 x half>, <128 x half>* %b
- %op3 = load <128 x half>, <128 x half>* %c
+ %op1 = load <128 x half>, ptr %a
+ %op2 = load <128 x half>, ptr %b
+ %op3 = load <128 x half>, ptr %c
%mul = fmul contract <128 x half> %op1, %op2
%res = fadd contract <128 x half> %mul, %op3
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @fma_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) vscale_range(2,0) #0 {
+define void @fma_v8f32(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fma_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
- %op3 = load <8 x float>, <8 x float>* %c
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
+ %op3 = load <8 x float>, ptr %c
%mul = fmul contract <8 x float> %op1, %op2
%res = fadd contract <8 x float> %mul, %op3
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @fma_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x float>* %c) #0 {
+define void @fma_v16f32(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: fma_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
- %op3 = load <16 x float>, <16 x float>* %c
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
+ %op3 = load <16 x float>, ptr %c
%mul = fmul contract <16 x float> %op1, %op2
%res = fadd contract <16 x float> %mul, %op3
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fma_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x float>* %c) vscale_range(8,0) #0 {
+define void @fma_v32f32(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: fma_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
- %op3 = load <32 x float>, <32 x float>* %c
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
+ %op3 = load <32 x float>, ptr %c
%mul = fmul contract <32 x float> %op1, %op2
%res = fadd contract <32 x float> %mul, %op3
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @fma_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x float>* %c) vscale_range(16,0) #0 {
+define void @fma_v64f32(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: fma_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
- %op2 = load <64 x float>, <64 x float>* %b
- %op3 = load <64 x float>, <64 x float>* %c
+ %op1 = load <64 x float>, ptr %a
+ %op2 = load <64 x float>, ptr %b
+ %op3 = load <64 x float>, ptr %c
%mul = fmul contract <64 x float> %op1, %op2
%res = fadd contract <64 x float> %mul, %op3
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @fma_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x double>* %c) vscale_range(2,0) #0 {
+define void @fma_v4f64(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: fma_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
- %op3 = load <4 x double>, <4 x double>* %c
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
+ %op3 = load <4 x double>, ptr %c
%mul = fmul contract <4 x double> %op1, %op2
%res = fadd contract <4 x double> %mul, %op3
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @fma_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x double>* %c) #0 {
+define void @fma_v8f64(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-LABEL: fma_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
- %op3 = load <8 x double>, <8 x double>* %c
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
+ %op3 = load <8 x double>, ptr %c
%mul = fmul contract <8 x double> %op1, %op2
%res = fadd contract <8 x double> %mul, %op3
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @fma_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x double>* %c) vscale_range(8,0) #0 {
+define void @fma_v16f64(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: fma_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
- %op3 = load <16 x double>, <16 x double>* %c
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
+ %op3 = load <16 x double>, ptr %c
%mul = fmul contract <16 x double> %op1, %op2
%res = fadd contract <16 x double> %mul, %op3
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @fma_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x double>* %c) vscale_range(16,0) #0 {
+define void @fma_v32f64(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: fma_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
- %op2 = load <32 x double>, <32 x double>* %b
- %op3 = load <32 x double>, <32 x double>* %c
+ %op1 = load <32 x double>, ptr %a
+ %op2 = load <32 x double>, ptr %b
+ %op3 = load <32 x double>, ptr %c
%mul = fmul contract <32 x double> %op1, %op2
%res = fadd contract <32 x double> %mul, %op3
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @fmaxnm_v16f16(<16 x half>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @fmaxnm_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fmaxnm_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%res = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %op1, <16 x half> %op2)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @fmaxnm_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
+define void @fmaxnm_v32f16(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: fmaxnm_v32f16:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
%res = call <32 x half> @llvm.maxnum.v32f16(<32 x half> %op1, <32 x half> %op2)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fmaxnm_v64f16(<64 x half>* %a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @fmaxnm_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fmaxnm_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
%res = call <64 x half> @llvm.maxnum.v64f16(<64 x half> %op1, <64 x half> %op2)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @fmaxnm_v128f16(<128 x half>* %a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @fmaxnm_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fmaxnm_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
- %op2 = load <128 x half>, <128 x half>* %b
+ %op1 = load <128 x half>, ptr %a
+ %op2 = load <128 x half>, ptr %b
%res = call <128 x half> @llvm.maxnum.v128f16(<128 x half> %op1, <128 x half> %op2)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @fmaxnm_v8f32(<8 x float>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @fmaxnm_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fmaxnm_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
%res = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %op1, <8 x float> %op2)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @fmaxnm_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
+define void @fmaxnm_v16f32(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: fmaxnm_v16f32:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
%res = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %op1, <16 x float> %op2)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fmaxnm_v32f32(<32 x float>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @fmaxnm_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fmaxnm_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
%res = call <32 x float> @llvm.maxnum.v32f32(<32 x float> %op1, <32 x float> %op2)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @fmaxnm_v64f32(<64 x float>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @fmaxnm_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fmaxnm_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
- %op2 = load <64 x float>, <64 x float>* %b
+ %op1 = load <64 x float>, ptr %a
+ %op2 = load <64 x float>, ptr %b
%res = call <64 x float> @llvm.maxnum.v64f32(<64 x float> %op1, <64 x float> %op2)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @fmaxnm_v4f64(<4 x double>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @fmaxnm_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fmaxnm_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%res = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %op1, <4 x double> %op2)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @fmaxnm_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
+define void @fmaxnm_v8f64(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: fmaxnm_v8f64:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
%res = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %op1, <8 x double> %op2)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @fmaxnm_v16f64(<16 x double>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @fmaxnm_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fmaxnm_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
%res = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %op1, <16 x double> %op2)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @fmaxnm_v32f64(<32 x double>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @fmaxnm_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fmaxnm_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
- %op2 = load <32 x double>, <32 x double>* %b
+ %op1 = load <32 x double>, ptr %a
+ %op2 = load <32 x double>, ptr %b
%res = call <32 x double> @llvm.maxnum.v32f64(<32 x double> %op1, <32 x double> %op2)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @fminnm_v16f16(<16 x half>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @fminnm_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fminnm_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%res = call <16 x half> @llvm.minnum.v16f16(<16 x half> %op1, <16 x half> %op2)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @fminnm_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
+define void @fminnm_v32f16(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: fminnm_v32f16:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
%res = call <32 x half> @llvm.minnum.v32f16(<32 x half> %op1, <32 x half> %op2)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fminnm_v64f16(<64 x half>* %a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @fminnm_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fminnm_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
%res = call <64 x half> @llvm.minnum.v64f16(<64 x half> %op1, <64 x half> %op2)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @fminnm_v128f16(<128 x half>* %a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @fminnm_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fminnm_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
- %op2 = load <128 x half>, <128 x half>* %b
+ %op1 = load <128 x half>, ptr %a
+ %op2 = load <128 x half>, ptr %b
%res = call <128 x half> @llvm.minnum.v128f16(<128 x half> %op1, <128 x half> %op2)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @fminnm_v8f32(<8 x float>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @fminnm_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fminnm_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
%res = call <8 x float> @llvm.minnum.v8f32(<8 x float> %op1, <8 x float> %op2)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @fminnm_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
+define void @fminnm_v16f32(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: fminnm_v16f32:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
%res = call <16 x float> @llvm.minnum.v16f32(<16 x float> %op1, <16 x float> %op2)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fminnm_v32f32(<32 x float>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @fminnm_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fminnm_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
%res = call <32 x float> @llvm.minnum.v32f32(<32 x float> %op1, <32 x float> %op2)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @fminnm_v64f32(<64 x float>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @fminnm_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fminnm_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
- %op2 = load <64 x float>, <64 x float>* %b
+ %op1 = load <64 x float>, ptr %a
+ %op2 = load <64 x float>, ptr %b
%res = call <64 x float> @llvm.minnum.v64f32(<64 x float> %op1, <64 x float> %op2)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @fminnm_v4f64(<4 x double>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @fminnm_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fminnm_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%res = call <4 x double> @llvm.minnum.v4f64(<4 x double> %op1, <4 x double> %op2)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @fminnm_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
+define void @fminnm_v8f64(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: fminnm_v8f64:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
%res = call <8 x double> @llvm.minnum.v8f64(<8 x double> %op1, <8 x double> %op2)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @fminnm_v16f64(<16 x double>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @fminnm_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fminnm_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
%res = call <16 x double> @llvm.minnum.v16f64(<16 x double> %op1, <16 x double> %op2)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @fminnm_v32f64(<32 x double>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @fminnm_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fminnm_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
- %op2 = load <32 x double>, <32 x double>* %b
+ %op1 = load <32 x double>, ptr %a
+ %op2 = load <32 x double>, ptr %b
%res = call <32 x double> @llvm.minnum.v32f64(<32 x double> %op1, <32 x double> %op2)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @fmax_v16f16(<16 x half>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @fmax_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fmax_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%res = call <16 x half> @llvm.maximum.v16f16(<16 x half> %op1, <16 x half> %op2)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @fmax_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
+define void @fmax_v32f16(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: fmax_v32f16:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fmax z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
%res = call <32 x half> @llvm.maximum.v32f16(<32 x half> %op1, <32 x half> %op2)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fmax_v64f16(<64 x half>* %a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @fmax_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fmax_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
%res = call <64 x half> @llvm.maximum.v64f16(<64 x half> %op1, <64 x half> %op2)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @fmax_v128f16(<128 x half>* %a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @fmax_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fmax_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
- %op2 = load <128 x half>, <128 x half>* %b
+ %op1 = load <128 x half>, ptr %a
+ %op2 = load <128 x half>, ptr %b
%res = call <128 x half> @llvm.maximum.v128f16(<128 x half> %op1, <128 x half> %op2)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @fmax_v8f32(<8 x float>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @fmax_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fmax_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
%res = call <8 x float> @llvm.maximum.v8f32(<8 x float> %op1, <8 x float> %op2)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @fmax_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
+define void @fmax_v16f32(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: fmax_v16f32:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fmax z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
%res = call <16 x float> @llvm.maximum.v16f32(<16 x float> %op1, <16 x float> %op2)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fmax_v32f32(<32 x float>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @fmax_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fmax_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
%res = call <32 x float> @llvm.maximum.v32f32(<32 x float> %op1, <32 x float> %op2)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @fmax_v64f32(<64 x float>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @fmax_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fmax_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
- %op2 = load <64 x float>, <64 x float>* %b
+ %op1 = load <64 x float>, ptr %a
+ %op2 = load <64 x float>, ptr %b
%res = call <64 x float> @llvm.maximum.v64f32(<64 x float> %op1, <64 x float> %op2)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @fmax_v4f64(<4 x double>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @fmax_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fmax_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%res = call <4 x double> @llvm.maximum.v4f64(<4 x double> %op1, <4 x double> %op2)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @fmax_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
+define void @fmax_v8f64(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: fmax_v8f64:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fmax z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
%res = call <8 x double> @llvm.maximum.v8f64(<8 x double> %op1, <8 x double> %op2)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @fmax_v16f64(<16 x double>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @fmax_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fmax_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
%res = call <16 x double> @llvm.maximum.v16f64(<16 x double> %op1, <16 x double> %op2)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @fmax_v32f64(<32 x double>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @fmax_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fmax_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
- %op2 = load <32 x double>, <32 x double>* %b
+ %op1 = load <32 x double>, ptr %a
+ %op2 = load <32 x double>, ptr %b
%res = call <32 x double> @llvm.maximum.v32f64(<32 x double> %op1, <32 x double> %op2)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @fmin_v16f16(<16 x half>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @fmin_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fmin_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%res = call <16 x half> @llvm.minimum.v16f16(<16 x half> %op1, <16 x half> %op2)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @fmin_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
+define void @fmin_v32f16(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: fmin_v32f16:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fmin z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
%res = call <32 x half> @llvm.minimum.v32f16(<32 x half> %op1, <32 x half> %op2)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fmin_v64f16(<64 x half>* %a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @fmin_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fmin_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
%res = call <64 x half> @llvm.minimum.v64f16(<64 x half> %op1, <64 x half> %op2)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @fmin_v128f16(<128 x half>* %a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @fmin_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fmin_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
- %op2 = load <128 x half>, <128 x half>* %b
+ %op1 = load <128 x half>, ptr %a
+ %op2 = load <128 x half>, ptr %b
%res = call <128 x half> @llvm.minimum.v128f16(<128 x half> %op1, <128 x half> %op2)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @fmin_v8f32(<8 x float>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @fmin_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fmin_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
%res = call <8 x float> @llvm.minimum.v8f32(<8 x float> %op1, <8 x float> %op2)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @fmin_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
+define void @fmin_v16f32(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: fmin_v16f32:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fmin z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
%res = call <16 x float> @llvm.minimum.v16f32(<16 x float> %op1, <16 x float> %op2)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fmin_v32f32(<32 x float>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @fmin_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fmin_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
%res = call <32 x float> @llvm.minimum.v32f32(<32 x float> %op1, <32 x float> %op2)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @fmin_v64f32(<64 x float>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @fmin_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fmin_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
- %op2 = load <64 x float>, <64 x float>* %b
+ %op1 = load <64 x float>, ptr %a
+ %op2 = load <64 x float>, ptr %b
%res = call <64 x float> @llvm.minimum.v64f32(<64 x float> %op1, <64 x float> %op2)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @fmin_v4f64(<4 x double>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @fmin_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fmin_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%res = call <4 x double> @llvm.minimum.v4f64(<4 x double> %op1, <4 x double> %op2)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @fmin_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
+define void @fmin_v8f64(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: fmin_v8f64:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fmin z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
%res = call <8 x double> @llvm.minimum.v8f64(<8 x double> %op1, <8 x double> %op2)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @fmin_v16f64(<16 x double>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @fmin_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fmin_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
%res = call <16 x double> @llvm.minimum.v16f64(<16 x double> %op1, <16 x double> %op2)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @fmin_v32f64(<32 x double>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @fmin_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fmin_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
- %op2 = load <32 x double>, <32 x double>* %b
+ %op1 = load <32 x double>, ptr %a
+ %op2 = load <32 x double>, ptr %b
%res = call <32 x double> @llvm.minimum.v32f64(<32 x double> %op1, <32 x double> %op2)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret half %res
}
-define half @fadda_v16f16(half %start, <16 x half>* %a) vscale_range(2,0) #0 {
+define half @fadda_v16f16(half %start, ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fadda_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fadda h0, p0, h0, z1.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
ret half %res
}
-define half @fadda_v32f16(half %start, <32 x half>* %a) #0 {
+define half @fadda_v32f16(half %start, ptr %a) #0 {
; VBITS_GE_256-LABEL: fadda_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fadda h0, p0, h0, z1.h
; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = call half @llvm.vector.reduce.fadd.v32f16(half %start, <32 x half> %op)
ret half %res
}
-define half @fadda_v64f16(half %start, <64 x half>* %a) vscale_range(8,0) #0 {
+define half @fadda_v64f16(half %start, ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fadda_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fadda h0, p0, h0, z1.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = call half @llvm.vector.reduce.fadd.v64f16(half %start, <64 x half> %op)
ret half %res
}
-define half @fadda_v128f16(half %start, <128 x half>* %a) vscale_range(16,0) #0 {
+define half @fadda_v128f16(half %start, ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fadda_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fadda h0, p0, h0, z1.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = call half @llvm.vector.reduce.fadd.v128f16(half %start, <128 x half> %op)
ret half %res
}
ret float %res
}
-define float @fadda_v8f32(float %start, <8 x float>* %a) vscale_range(2,0) #0 {
+define float @fadda_v8f32(float %start, ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fadda_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fadda s0, p0, s0, z1.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
ret float %res
}
-define float @fadda_v16f32(float %start, <16 x float>* %a) #0 {
+define float @fadda_v16f32(float %start, ptr %a) #0 {
; VBITS_GE_256-LABEL: fadda_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fadda s0, p0, s0, z1.s
; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = call float @llvm.vector.reduce.fadd.v16f32(float %start, <16 x float> %op)
ret float %res
}
-define float @fadda_v32f32(float %start, <32 x float>* %a) vscale_range(8,0) #0 {
+define float @fadda_v32f32(float %start, ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fadda_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fadda s0, p0, s0, z1.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = call float @llvm.vector.reduce.fadd.v32f32(float %start, <32 x float> %op)
ret float %res
}
-define float @fadda_v64f32(float %start, <64 x float>* %a) vscale_range(16,0) #0 {
+define float @fadda_v64f32(float %start, ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fadda_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fadda s0, p0, s0, z1.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = call float @llvm.vector.reduce.fadd.v64f32(float %start, <64 x float> %op)
ret float %res
}
ret double %res
}
-define double @fadda_v4f64(double %start, <4 x double>* %a) vscale_range(2,0) #0 {
+define double @fadda_v4f64(double %start, ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fadda_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fadda d0, p0, d0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
ret double %res
}
-define double @fadda_v8f64(double %start, <8 x double>* %a) #0 {
+define double @fadda_v8f64(double %start, ptr %a) #0 {
; VBITS_GE_256-LABEL: fadda_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fadda d0, p0, d0, z1.d
; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = call double @llvm.vector.reduce.fadd.v8f64(double %start, <8 x double> %op)
ret double %res
}
-define double @fadda_v16f64(double %start, <16 x double>* %a) vscale_range(8,0) #0 {
+define double @fadda_v16f64(double %start, ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fadda_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fadda d0, p0, d0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = call double @llvm.vector.reduce.fadd.v16f64(double %start, <16 x double> %op)
ret double %res
}
-define double @fadda_v32f64(double %start, <32 x double>* %a) vscale_range(16,0) #0 {
+define double @fadda_v32f64(double %start, ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fadda_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fadda d0, p0, d0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = call double @llvm.vector.reduce.fadd.v32f64(double %start, <32 x double> %op)
ret double %res
}
ret half %res
}
-define half @faddv_v16f16(half %start, <16 x half>* %a) vscale_range(2,0) #0 {
+define half @faddv_v16f16(half %start, ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: faddv_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: faddv h1, p0, z1.h
; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = call fast half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
ret half %res
}
-define half @faddv_v32f16(half %start, <32 x half>* %a) #0 {
+define half @faddv_v32f16(half %start, ptr %a) #0 {
; VBITS_GE_256-LABEL: faddv_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: faddv h1, p0, z1.h
; VBITS_GE_512-NEXT: fadd h0, h0, h1
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = call fast half @llvm.vector.reduce.fadd.v32f16(half %start, <32 x half> %op)
ret half %res
}
-define half @faddv_v64f16(half %start, <64 x half>* %a) vscale_range(8,0) #0 {
+define half @faddv_v64f16(half %start, ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: faddv_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: faddv h1, p0, z1.h
; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = call fast half @llvm.vector.reduce.fadd.v64f16(half %start, <64 x half> %op)
ret half %res
}
-define half @faddv_v128f16(half %start, <128 x half>* %a) vscale_range(16,0) #0 {
+define half @faddv_v128f16(half %start, ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: faddv_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: faddv h1, p0, z1.h
; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = call fast half @llvm.vector.reduce.fadd.v128f16(half %start, <128 x half> %op)
ret half %res
}
ret float %res
}
-define float @faddv_v8f32(float %start, <8 x float>* %a) vscale_range(2,0) #0 {
+define float @faddv_v8f32(float %start, ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: faddv_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: faddv s1, p0, z1.s
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = call fast float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
ret float %res
}
-define float @faddv_v16f32(float %start, <16 x float>* %a) #0 {
+define float @faddv_v16f32(float %start, ptr %a) #0 {
; VBITS_GE_256-LABEL: faddv_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: faddv s1, p0, z1.s
; VBITS_GE_512-NEXT: fadd s0, s0, s1
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = call fast float @llvm.vector.reduce.fadd.v16f32(float %start, <16 x float> %op)
ret float %res
}
-define float @faddv_v32f32(float %start, <32 x float>* %a) vscale_range(8,0) #0 {
+define float @faddv_v32f32(float %start, ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: faddv_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: faddv s1, p0, z1.s
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = call fast float @llvm.vector.reduce.fadd.v32f32(float %start, <32 x float> %op)
ret float %res
}
-define float @faddv_v64f32(float %start, <64 x float>* %a) vscale_range(16,0) #0 {
+define float @faddv_v64f32(float %start, ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: faddv_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: faddv s1, p0, z1.s
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = call fast float @llvm.vector.reduce.fadd.v64f32(float %start, <64 x float> %op)
ret float %res
}
ret double %res
}
-define double @faddv_v4f64(double %start, <4 x double>* %a) vscale_range(2,0) #0 {
+define double @faddv_v4f64(double %start, ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: faddv_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = call fast double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
ret double %res
}
-define double @faddv_v8f64(double %start, <8 x double>* %a) #0 {
+define double @faddv_v8f64(double %start, ptr %a) #0 {
; VBITS_GE_256-LABEL: faddv_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: faddv d1, p0, z1.d
; VBITS_GE_512-NEXT: fadd d0, d0, d1
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = call fast double @llvm.vector.reduce.fadd.v8f64(double %start, <8 x double> %op)
ret double %res
}
-define double @faddv_v16f64(double %start, <16 x double>* %a) vscale_range(8,0) #0 {
+define double @faddv_v16f64(double %start, ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: faddv_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = call fast double @llvm.vector.reduce.fadd.v16f64(double %start, <16 x double> %op)
ret double %res
}
-define double @faddv_v32f64(double %start, <32 x double>* %a) vscale_range(16,0) #0 {
+define double @faddv_v32f64(double %start, ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: faddv_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = call fast double @llvm.vector.reduce.fadd.v32f64(double %start, <32 x double> %op)
ret double %res
}
ret half %res
}
-define half @fmaxv_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
+define half @fmaxv_v16f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fmaxv_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fmaxnmv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op)
ret half %res
}
-define half @fmaxv_v32f16(<32 x half>* %a) #0 {
+define half @fmaxv_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: fmaxv_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fmaxnmv h0, p0, z0.h
; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %op)
ret half %res
}
-define half @fmaxv_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define half @fmaxv_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fmaxv_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fmaxnmv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = call half @llvm.vector.reduce.fmax.v64f16(<64 x half> %op)
ret half %res
}
-define half @fmaxv_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define half @fmaxv_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fmaxv_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fmaxnmv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = call half @llvm.vector.reduce.fmax.v128f16(<128 x half> %op)
ret half %res
}
ret float %res
}
-define float @fmaxv_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define float @fmaxv_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fmaxv_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fmaxnmv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op)
ret float %res
}
-define float @fmaxv_v16f32(<16 x float>* %a) #0 {
+define float @fmaxv_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: fmaxv_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fmaxnmv s0, p0, z0.s
; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %op)
ret float %res
}
-define float @fmaxv_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define float @fmaxv_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fmaxv_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fmaxnmv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> %op)
ret float %res
}
-define float @fmaxv_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define float @fmaxv_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fmaxv_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fmaxnmv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = call float @llvm.vector.reduce.fmax.v64f32(<64 x float> %op)
ret float %res
}
ret double %res
}
-define double @fmaxv_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define double @fmaxv_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fmaxv_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fmaxnmv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op)
ret double %res
}
-define double @fmaxv_v8f64(<8 x double>* %a) #0 {
+define double @fmaxv_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: fmaxv_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fmaxnmv d0, p0, z0.d
; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %op)
ret double %res
}
-define double @fmaxv_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define double @fmaxv_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fmaxv_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fmaxnmv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> %op)
ret double %res
}
-define double @fmaxv_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define double @fmaxv_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fmaxv_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fmaxnmv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %op)
ret double %res
}
ret half %res
}
-define half @fminv_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
+define half @fminv_v16f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fminv_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fminnmv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op)
ret half %res
}
-define half @fminv_v32f16(<32 x half>* %a) #0 {
+define half @fminv_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: fminv_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fminnmv h0, p0, z0.h
; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %op)
ret half %res
}
-define half @fminv_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define half @fminv_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fminv_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fminnmv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = call half @llvm.vector.reduce.fmin.v64f16(<64 x half> %op)
ret half %res
}
-define half @fminv_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define half @fminv_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fminv_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fminnmv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = call half @llvm.vector.reduce.fmin.v128f16(<128 x half> %op)
ret half %res
}
ret float %res
}
-define float @fminv_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define float @fminv_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fminv_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fminnmv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op)
ret float %res
}
-define float @fminv_v16f32(<16 x float>* %a) #0 {
+define float @fminv_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: fminv_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fminnmv s0, p0, z0.s
; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %op)
ret float %res
}
-define float @fminv_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define float @fminv_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fminv_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fminnmv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> %op)
ret float %res
}
-define float @fminv_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define float @fminv_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fminv_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fminnmv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = call float @llvm.vector.reduce.fmin.v64f32(<64 x float> %op)
ret float %res
}
ret double %res
}
-define double @fminv_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define double @fminv_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fminv_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fminnmv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op)
ret double %res
}
-define double @fminv_v8f64(<8 x double>* %a) #0 {
+define double @fminv_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: fminv_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fminnmv d0, p0, z0.d
; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %op)
ret double %res
}
-define double @fminv_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define double @fminv_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: fminv_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fminnmv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> %op)
ret double %res
}
-define double @fminv_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define double @fminv_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: fminv_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fminnmv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %op)
ret double %res
}
ret <8 x half> %res
}
-define void @frintp_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
+define void @frintp_v16f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintp_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: frintp z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.ceil.v16f16(<16 x half> %op)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @frintp_v32f16(<32 x half>* %a) #0 {
+define void @frintp_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintp_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: frintp z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = call <32 x half> @llvm.ceil.v32f16(<32 x half> %op)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @frintp_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define void @frintp_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintp_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: frintp z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = call <64 x half> @llvm.ceil.v64f16(<64 x half> %op)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @frintp_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define void @frintp_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintp_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: frintp z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = call <128 x half> @llvm.ceil.v128f16(<128 x half> %op)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @frintp_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define void @frintp_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintp_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: frintp z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.ceil.v8f32(<8 x float> %op)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @frintp_v16f32(<16 x float>* %a) #0 {
+define void @frintp_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintp_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: frintp z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %op)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @frintp_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define void @frintp_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintp_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: frintp z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = call <32 x float> @llvm.ceil.v32f32(<32 x float> %op)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @frintp_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define void @frintp_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintp_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: frintp z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = call <64 x float> @llvm.ceil.v64f32(<64 x float> %op)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @frintp_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define void @frintp_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintp_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: frintp z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.ceil.v4f64(<4 x double> %op)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @frintp_v8f64(<8 x double>* %a) #0 {
+define void @frintp_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintp_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: frintp z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %op)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @frintp_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define void @frintp_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintp_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: frintp z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = call <16 x double> @llvm.ceil.v16f64(<16 x double> %op)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @frintp_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define void @frintp_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintp_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: frintp z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = call <32 x double> @llvm.ceil.v32f64(<32 x double> %op)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @frintm_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
+define void @frintm_v16f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintm_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: frintm z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.floor.v16f16(<16 x half> %op)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @frintm_v32f16(<32 x half>* %a) #0 {
+define void @frintm_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintm_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: frintm z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = call <32 x half> @llvm.floor.v32f16(<32 x half> %op)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @frintm_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define void @frintm_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintm_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: frintm z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = call <64 x half> @llvm.floor.v64f16(<64 x half> %op)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @frintm_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define void @frintm_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintm_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: frintm z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = call <128 x half> @llvm.floor.v128f16(<128 x half> %op)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @frintm_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define void @frintm_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintm_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: frintm z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.floor.v8f32(<8 x float> %op)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @frintm_v16f32(<16 x float>* %a) #0 {
+define void @frintm_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintm_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: frintm z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = call <16 x float> @llvm.floor.v16f32(<16 x float> %op)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @frintm_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define void @frintm_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintm_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: frintm z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = call <32 x float> @llvm.floor.v32f32(<32 x float> %op)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @frintm_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define void @frintm_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintm_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: frintm z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = call <64 x float> @llvm.floor.v64f32(<64 x float> %op)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @frintm_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define void @frintm_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintm_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: frintm z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.floor.v4f64(<4 x double> %op)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @frintm_v8f64(<8 x double>* %a) #0 {
+define void @frintm_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintm_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: frintm z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = call <8 x double> @llvm.floor.v8f64(<8 x double> %op)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @frintm_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define void @frintm_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintm_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: frintm z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = call <16 x double> @llvm.floor.v16f64(<16 x double> %op)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @frintm_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define void @frintm_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintm_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: frintm z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = call <32 x double> @llvm.floor.v32f64(<32 x double> %op)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @frinti_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
+define void @frinti_v16f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frinti_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: frinti z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %op)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @frinti_v32f16(<32 x half>* %a) #0 {
+define void @frinti_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: frinti_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: frinti z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = call <32 x half> @llvm.nearbyint.v32f16(<32 x half> %op)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @frinti_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define void @frinti_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frinti_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: frinti z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = call <64 x half> @llvm.nearbyint.v64f16(<64 x half> %op)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @frinti_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define void @frinti_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frinti_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: frinti z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = call <128 x half> @llvm.nearbyint.v128f16(<128 x half> %op)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @frinti_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define void @frinti_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frinti_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: frinti z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %op)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @frinti_v16f32(<16 x float>* %a) #0 {
+define void @frinti_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: frinti_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: frinti z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %op)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @frinti_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define void @frinti_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frinti_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: frinti z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = call <32 x float> @llvm.nearbyint.v32f32(<32 x float> %op)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @frinti_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define void @frinti_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frinti_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: frinti z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = call <64 x float> @llvm.nearbyint.v64f32(<64 x float> %op)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @frinti_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define void @frinti_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frinti_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: frinti z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %op)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @frinti_v8f64(<8 x double>* %a) #0 {
+define void @frinti_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: frinti_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: frinti z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %op)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @frinti_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define void @frinti_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frinti_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: frinti z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> %op)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @frinti_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define void @frinti_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frinti_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: frinti z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = call <32 x double> @llvm.nearbyint.v32f64(<32 x double> %op)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @frintx_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
+define void @frintx_v16f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintx_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: frintx z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.rint.v16f16(<16 x half> %op)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @frintx_v32f16(<32 x half>* %a) #0 {
+define void @frintx_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintx_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: frintx z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = call <32 x half> @llvm.rint.v32f16(<32 x half> %op)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @frintx_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define void @frintx_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintx_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: frintx z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = call <64 x half> @llvm.rint.v64f16(<64 x half> %op)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @frintx_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define void @frintx_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintx_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: frintx z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = call <128 x half> @llvm.rint.v128f16(<128 x half> %op)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @frintx_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define void @frintx_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintx_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: frintx z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.rint.v8f32(<8 x float> %op)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @frintx_v16f32(<16 x float>* %a) #0 {
+define void @frintx_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintx_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: frintx z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = call <16 x float> @llvm.rint.v16f32(<16 x float> %op)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @frintx_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define void @frintx_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintx_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: frintx z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = call <32 x float> @llvm.rint.v32f32(<32 x float> %op)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @frintx_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define void @frintx_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintx_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: frintx z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = call <64 x float> @llvm.rint.v64f32(<64 x float> %op)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @frintx_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define void @frintx_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintx_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: frintx z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.rint.v4f64(<4 x double> %op)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @frintx_v8f64(<8 x double>* %a) #0 {
+define void @frintx_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintx_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: frintx z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = call <8 x double> @llvm.rint.v8f64(<8 x double> %op)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @frintx_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define void @frintx_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintx_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: frintx z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = call <16 x double> @llvm.rint.v16f64(<16 x double> %op)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @frintx_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define void @frintx_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintx_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: frintx z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = call <32 x double> @llvm.rint.v32f64(<32 x double> %op)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @frinta_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
+define void @frinta_v16f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frinta_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: frinta z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.round.v16f16(<16 x half> %op)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @frinta_v32f16(<32 x half>* %a) #0 {
+define void @frinta_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: frinta_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: frinta z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = call <32 x half> @llvm.round.v32f16(<32 x half> %op)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @frinta_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define void @frinta_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frinta_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: frinta z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = call <64 x half> @llvm.round.v64f16(<64 x half> %op)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @frinta_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define void @frinta_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frinta_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: frinta z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = call <128 x half> @llvm.round.v128f16(<128 x half> %op)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @frinta_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define void @frinta_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frinta_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: frinta z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.round.v8f32(<8 x float> %op)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @frinta_v16f32(<16 x float>* %a) #0 {
+define void @frinta_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: frinta_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: frinta z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = call <16 x float> @llvm.round.v16f32(<16 x float> %op)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @frinta_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define void @frinta_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frinta_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: frinta z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = call <32 x float> @llvm.round.v32f32(<32 x float> %op)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @frinta_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define void @frinta_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frinta_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: frinta z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = call <64 x float> @llvm.round.v64f32(<64 x float> %op)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @frinta_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define void @frinta_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frinta_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: frinta z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.round.v4f64(<4 x double> %op)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @frinta_v8f64(<8 x double>* %a) #0 {
+define void @frinta_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: frinta_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: frinta z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = call <8 x double> @llvm.round.v8f64(<8 x double> %op)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @frinta_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define void @frinta_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frinta_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: frinta z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = call <16 x double> @llvm.round.v16f64(<16 x double> %op)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @frinta_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define void @frinta_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frinta_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: frinta z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = call <32 x double> @llvm.round.v32f64(<32 x double> %op)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @frintn_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
+define void @frintn_v16f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintn_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: frintn z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @frintn_v32f16(<32 x half>* %a) #0 {
+define void @frintn_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintn_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: frintn z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @frintn_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define void @frintn_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintn_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: frintn z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = call <64 x half> @llvm.roundeven.v64f16(<64 x half> %op)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @frintn_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define void @frintn_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintn_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: frintn z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = call <128 x half> @llvm.roundeven.v128f16(<128 x half> %op)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @frintn_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define void @frintn_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintn_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: frintn z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @frintn_v16f32(<16 x float>* %a) #0 {
+define void @frintn_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintn_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: frintn z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @frintn_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define void @frintn_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintn_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: frintn z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = call <32 x float> @llvm.roundeven.v32f32(<32 x float> %op)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @frintn_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define void @frintn_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintn_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: frintn z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = call <64 x float> @llvm.roundeven.v64f32(<64 x float> %op)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @frintn_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define void @frintn_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintn_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: frintn z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @frintn_v8f64(<8 x double>* %a) #0 {
+define void @frintn_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintn_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: frintn z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @frintn_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define void @frintn_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintn_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: frintn z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = call <16 x double> @llvm.roundeven.v16f64(<16 x double> %op)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @frintn_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define void @frintn_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintn_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: frintn z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = call <32 x double> @llvm.roundeven.v32f64(<32 x double> %op)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %res
}
-define void @frintz_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
+define void @frintz_v16f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintz_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: frintz z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x half>, <16 x half>* %a
+ %op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.trunc.v16f16(<16 x half> %op)
- store <16 x half> %res, <16 x half>* %a
+ store <16 x half> %res, ptr %a
ret void
}
-define void @frintz_v32f16(<32 x half>* %a) #0 {
+define void @frintz_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintz_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: frintz z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x half>, <32 x half>* %a
+ %op = load <32 x half>, ptr %a
%res = call <32 x half> @llvm.trunc.v32f16(<32 x half> %op)
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @frintz_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define void @frintz_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintz_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: frintz z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x half>, <64 x half>* %a
+ %op = load <64 x half>, ptr %a
%res = call <64 x half> @llvm.trunc.v64f16(<64 x half> %op)
- store <64 x half> %res, <64 x half>* %a
+ store <64 x half> %res, ptr %a
ret void
}
-define void @frintz_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define void @frintz_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintz_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: frintz z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x half>, <128 x half>* %a
+ %op = load <128 x half>, ptr %a
%res = call <128 x half> @llvm.trunc.v128f16(<128 x half> %op)
- store <128 x half> %res, <128 x half>* %a
+ store <128 x half> %res, ptr %a
ret void
}
ret <4 x float> %res
}
-define void @frintz_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define void @frintz_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintz_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: frintz z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x float>, <8 x float>* %a
+ %op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.trunc.v8f32(<8 x float> %op)
- store <8 x float> %res, <8 x float>* %a
+ store <8 x float> %res, ptr %a
ret void
}
-define void @frintz_v16f32(<16 x float>* %a) #0 {
+define void @frintz_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintz_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: frintz z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x float>, <16 x float>* %a
+ %op = load <16 x float>, ptr %a
%res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %op)
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @frintz_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define void @frintz_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintz_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: frintz z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x float>, <32 x float>* %a
+ %op = load <32 x float>, ptr %a
%res = call <32 x float> @llvm.trunc.v32f32(<32 x float> %op)
- store <32 x float> %res, <32 x float>* %a
+ store <32 x float> %res, ptr %a
ret void
}
-define void @frintz_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define void @frintz_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintz_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: frintz z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x float>, <64 x float>* %a
+ %op = load <64 x float>, ptr %a
%res = call <64 x float> @llvm.trunc.v64f32(<64 x float> %op)
- store <64 x float> %res, <64 x float>* %a
+ store <64 x float> %res, ptr %a
ret void
}
ret <2 x double> %res
}
-define void @frintz_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define void @frintz_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: frintz_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: frintz z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x double>, <4 x double>* %a
+ %op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.trunc.v4f64(<4 x double> %op)
- store <4 x double> %res, <4 x double>* %a
+ store <4 x double> %res, ptr %a
ret void
}
-define void @frintz_v8f64(<8 x double>* %a) #0 {
+define void @frintz_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: frintz_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: frintz z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x double>, <8 x double>* %a
+ %op = load <8 x double>, ptr %a
%res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %op)
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
-define void @frintz_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define void @frintz_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: frintz_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: frintz z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x double>, <16 x double>* %a
+ %op = load <16 x double>, ptr %a
%res = call <16 x double> @llvm.trunc.v16f64(<16 x double> %op)
- store <16 x double> %res, <16 x double>* %a
+ store <16 x double> %res, ptr %a
ret void
}
-define void @frintz_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define void @frintz_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: frintz_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: frintz z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x double>, <32 x double>* %a
+ %op = load <32 x double>, ptr %a
%res = call <32 x double> @llvm.trunc.v32f64(<32 x double> %op)
- store <32 x double> %res, <32 x double>* %a
+ store <32 x double> %res, ptr %a
ret void
}
ret <8 x half> %sel
}
-define void @select_v16f16(<16 x half>* %a, <16 x half>* %b, i1 %mask) vscale_range(2,0) #0 {
+define void @select_v16f16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <16 x half>, <16 x half>* %a
- %op2 = load volatile <16 x half>, <16 x half>* %b
+ %op1 = load volatile <16 x half>, ptr %a
+ %op2 = load volatile <16 x half>, ptr %b
%sel = select i1 %mask, <16 x half> %op1, <16 x half> %op2
- store <16 x half> %sel, <16 x half>* %a
+ store <16 x half> %sel, ptr %a
ret void
}
-define void @select_v32f16(<32 x half>* %a, <32 x half>* %b, i1 %mask) #0 {
+define void @select_v32f16(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load volatile <32 x half>, <32 x half>* %a
- %op2 = load volatile <32 x half>, <32 x half>* %b
+ %op1 = load volatile <32 x half>, ptr %a
+ %op2 = load volatile <32 x half>, ptr %b
%sel = select i1 %mask, <32 x half> %op1, <32 x half> %op2
- store <32 x half> %sel, <32 x half>* %a
+ store <32 x half> %sel, ptr %a
ret void
}
-define void @select_v64f16(<64 x half>* %a, <64 x half>* %b, i1 %mask) vscale_range(8,0) #0 {
+define void @select_v64f16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <64 x half>, <64 x half>* %a
- %op2 = load volatile <64 x half>, <64 x half>* %b
+ %op1 = load volatile <64 x half>, ptr %a
+ %op2 = load volatile <64 x half>, ptr %b
%sel = select i1 %mask, <64 x half> %op1, <64 x half> %op2
- store <64 x half> %sel, <64 x half>* %a
+ store <64 x half> %sel, ptr %a
ret void
}
-define void @select_v128f16(<128 x half>* %a, <128 x half>* %b, i1 %mask) vscale_range(16,0) #0 {
+define void @select_v128f16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <128 x half>, <128 x half>* %a
- %op2 = load volatile <128 x half>, <128 x half>* %b
+ %op1 = load volatile <128 x half>, ptr %a
+ %op2 = load volatile <128 x half>, ptr %b
%sel = select i1 %mask, <128 x half> %op1, <128 x half> %op2
- store <128 x half> %sel, <128 x half>* %a
+ store <128 x half> %sel, ptr %a
ret void
}
ret <4 x float> %sel
}
-define void @select_v8f32(<8 x float>* %a, <8 x float>* %b, i1 %mask) vscale_range(2,0) #0 {
+define void @select_v8f32(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <8 x float>, <8 x float>* %a
- %op2 = load volatile <8 x float>, <8 x float>* %b
+ %op1 = load volatile <8 x float>, ptr %a
+ %op2 = load volatile <8 x float>, ptr %b
%sel = select i1 %mask, <8 x float> %op1, <8 x float> %op2
- store <8 x float> %sel, <8 x float>* %a
+ store <8 x float> %sel, ptr %a
ret void
}
-define void @select_v16f32(<16 x float>* %a, <16 x float>* %b, i1 %mask) #0 {
+define void @select_v16f32(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load volatile <16 x float>, <16 x float>* %a
- %op2 = load volatile <16 x float>, <16 x float>* %b
+ %op1 = load volatile <16 x float>, ptr %a
+ %op2 = load volatile <16 x float>, ptr %b
%sel = select i1 %mask, <16 x float> %op1, <16 x float> %op2
- store <16 x float> %sel, <16 x float>* %a
+ store <16 x float> %sel, ptr %a
ret void
}
-define void @select_v32f32(<32 x float>* %a, <32 x float>* %b, i1 %mask) vscale_range(8,0) #0 {
+define void @select_v32f32(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <32 x float>, <32 x float>* %a
- %op2 = load volatile <32 x float>, <32 x float>* %b
+ %op1 = load volatile <32 x float>, ptr %a
+ %op2 = load volatile <32 x float>, ptr %b
%sel = select i1 %mask, <32 x float> %op1, <32 x float> %op2
- store <32 x float> %sel, <32 x float>* %a
+ store <32 x float> %sel, ptr %a
ret void
}
-define void @select_v64f32(<64 x float>* %a, <64 x float>* %b, i1 %mask) vscale_range(16,0) #0 {
+define void @select_v64f32(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <64 x float>, <64 x float>* %a
- %op2 = load volatile <64 x float>, <64 x float>* %b
+ %op1 = load volatile <64 x float>, ptr %a
+ %op2 = load volatile <64 x float>, ptr %b
%sel = select i1 %mask, <64 x float> %op1, <64 x float> %op2
- store <64 x float> %sel, <64 x float>* %a
+ store <64 x float> %sel, ptr %a
ret void
}
ret <2 x double> %sel
}
-define void @select_v4f64(<4 x double>* %a, <4 x double>* %b, i1 %mask) vscale_range(2,0) #0 {
+define void @select_v4f64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <4 x double>, <4 x double>* %a
- %op2 = load volatile <4 x double>, <4 x double>* %b
+ %op1 = load volatile <4 x double>, ptr %a
+ %op2 = load volatile <4 x double>, ptr %b
%sel = select i1 %mask, <4 x double> %op1, <4 x double> %op2
- store <4 x double> %sel, <4 x double>* %a
+ store <4 x double> %sel, ptr %a
ret void
}
-define void @select_v8f64(<8 x double>* %a, <8 x double>* %b, i1 %mask) #0 {
+define void @select_v8f64(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load volatile <8 x double>, <8 x double>* %a
- %op2 = load volatile <8 x double>, <8 x double>* %b
+ %op1 = load volatile <8 x double>, ptr %a
+ %op2 = load volatile <8 x double>, ptr %b
%sel = select i1 %mask, <8 x double> %op1, <8 x double> %op2
- store <8 x double> %sel, <8 x double>* %a
+ store <8 x double> %sel, ptr %a
ret void
}
-define void @select_v16f64(<16 x double>* %a, <16 x double>* %b, i1 %mask) vscale_range(8,0) #0 {
+define void @select_v16f64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <16 x double>, <16 x double>* %a
- %op2 = load volatile <16 x double>, <16 x double>* %b
+ %op1 = load volatile <16 x double>, ptr %a
+ %op2 = load volatile <16 x double>, ptr %b
%sel = select i1 %mask, <16 x double> %op1, <16 x double> %op2
- store <16 x double> %sel, <16 x double>* %a
+ store <16 x double> %sel, ptr %a
ret void
}
-define void @select_v32f64(<32 x double>* %a, <32 x double>* %b, i1 %mask) vscale_range(16,0) #0 {
+define void @select_v32f64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <32 x double>, <32 x double>* %a
- %op2 = load volatile <32 x double>, <32 x double>* %b
+ %op1 = load volatile <32 x double>, ptr %a
+ %op2 = load volatile <32 x double>, ptr %b
%sel = select i1 %mask, <32 x double> %op1, <32 x double> %op2
- store <32 x double> %sel, <32 x double>* %a
+ store <32 x double> %sel, ptr %a
ret void
}
}
; Don't use SVE for 128-bit vectors.
-define void @fcvtzu_v8f16_v8i16(<8 x half>* %a, <8 x i16>* %b) vscale_range(2,0) #0 {
+define void @fcvtzu_v8f16_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzu_v8f16_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: fcvtzu v0.8h, v0.8h
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x half>, <8 x half>* %a
+ %op1 = load <8 x half>, ptr %a
%res = fptoui <8 x half> %op1 to <8 x i16>
- store <8 x i16> %res, <8 x i16>* %b
+ store <8 x i16> %res, ptr %b
ret void
}
-define void @fcvtzu_v16f16_v16i16(<16 x half>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @fcvtzu_v16f16_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzu_v16f16_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
+ %op1 = load <16 x half>, ptr %a
%res = fptoui <16 x half> %op1 to <16 x i16>
- store <16 x i16> %res, <16 x i16>* %b
+ store <16 x i16> %res, ptr %b
ret void
}
-define void @fcvtzu_v32f16_v32i16(<32 x half>* %a, <32 x i16>* %b) #0 {
+define void @fcvtzu_v32f16_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzu_v32f16_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fcvtzu z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
+ %op1 = load <32 x half>, ptr %a
%res = fptoui <32 x half> %op1 to <32 x i16>
- store <32 x i16> %res, <32 x i16>* %b
+ store <32 x i16> %res, ptr %b
ret void
}
-define void @fcvtzu_v64f16_v64i16(<64 x half>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @fcvtzu_v64f16_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzu_v64f16_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
+ %op1 = load <64 x half>, ptr %a
%res = fptoui <64 x half> %op1 to <64 x i16>
- store <64 x i16> %res, <64 x i16>* %b
+ store <64 x i16> %res, ptr %b
ret void
}
-define void @fcvtzu_v128f16_v128i16(<128 x half>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @fcvtzu_v128f16_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzu_v128f16_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
+ %op1 = load <128 x half>, ptr %a
%res = fptoui <128 x half> %op1 to <128 x i16>
- store <128 x i16> %res, <128 x i16>* %b
+ store <128 x i16> %res, ptr %b
ret void
}
ret <4 x i32> %res
}
-define void @fcvtzu_v8f16_v8i32(<8 x half>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @fcvtzu_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzu_v8f16_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x half>, <8 x half>* %a
+ %op1 = load <8 x half>, ptr %a
%res = fptoui <8 x half> %op1 to <8 x i32>
- store <8 x i32> %res, <8 x i32>* %b
+ store <8 x i32> %res, ptr %b
ret void
}
-define void @fcvtzu_v16f16_v16i32(<16 x half>* %a, <16 x i32>* %b) #0 {
+define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzu_v16f16_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.h
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
+ %op1 = load <16 x half>, ptr %a
%res = fptoui <16 x half> %op1 to <16 x i32>
- store <16 x i32> %res, <16 x i32>* %b
+ store <16 x i32> %res, ptr %b
ret void
}
-define void @fcvtzu_v32f16_v32i32(<32 x half>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @fcvtzu_v32f16_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzu_v32f16_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
+ %op1 = load <32 x half>, ptr %a
%res = fptoui <32 x half> %op1 to <32 x i32>
- store <32 x i32> %res, <32 x i32>* %b
+ store <32 x i32> %res, ptr %b
ret void
}
-define void @fcvtzu_v64f16_v64i32(<64 x half>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @fcvtzu_v64f16_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzu_v64f16_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
+ %op1 = load <64 x half>, ptr %a
%res = fptoui <64 x half> %op1 to <64 x i32>
- store <64 x i32> %res, <64 x i32>* %b
+ store <64 x i32> %res, ptr %b
ret void
}
ret <2 x i64> %res
}
-define void @fcvtzu_v4f16_v4i64(<4 x half>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzu_v4f16_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x half>, <4 x half>* %a
+ %op1 = load <4 x half>, ptr %a
%res = fptoui <4 x half> %op1 to <4 x i64>
- store <4 x i64> %res, <4 x i64>* %b
+ store <4 x i64> %res, ptr %b
ret void
}
-define void @fcvtzu_v8f16_v8i64(<8 x half>* %a, <8 x i64>* %b) #0 {
+define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzu_v8f16_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x0]
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.h
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x half>, <8 x half>* %a
+ %op1 = load <8 x half>, ptr %a
%res = fptoui <8 x half> %op1 to <8 x i64>
- store <8 x i64> %res, <8 x i64>* %b
+ store <8 x i64> %res, ptr %b
ret void
}
-define void @fcvtzu_v16f16_v16i64(<16 x half>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzu_v16f16_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
+ %op1 = load <16 x half>, ptr %a
%res = fptoui <16 x half> %op1 to <16 x i64>
- store <16 x i64> %res, <16 x i64>* %b
+ store <16 x i64> %res, ptr %b
ret void
}
-define void @fcvtzu_v32f16_v32i64(<32 x half>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @fcvtzu_v32f16_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzu_v32f16_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
+ %op1 = load <32 x half>, ptr %a
%res = fptoui <32 x half> %op1 to <32 x i64>
- store <32 x i64> %res, <32 x i64>* %b
+ store <32 x i64> %res, ptr %b
ret void
}
ret <4 x i16> %res
}
-define <8 x i16> @fcvtzu_v8f32_v8i16(<8 x float>* %a) vscale_range(2,0) #0 {
+define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzu_v8f32_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
+ %op1 = load <8 x float>, ptr %a
%res = fptoui <8 x float> %op1 to <8 x i16>
ret <8 x i16> %res
}
-define void @fcvtzu_v16f32_v16i16(<16 x float>* %a, <16 x i16>* %b) #0 {
+define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fcvtzu z0.s, p1/m, z0.s
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
+ %op1 = load <16 x float>, ptr %a
%res = fptoui <16 x float> %op1 to <16 x i16>
- store <16 x i16> %res, <16 x i16>* %b
+ store <16 x i16> %res, ptr %b
ret void
}
-define void @fcvtzu_v32f32_v32i16(<32 x float>* %a, <32 x i16>* %b) vscale_range(8,0) #0 {
+define void @fcvtzu_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzu_v32f32_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fcvtzu z0.s, p1/m, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
+ %op1 = load <32 x float>, ptr %a
%res = fptoui <32 x float> %op1 to <32 x i16>
- store <32 x i16> %res, <32 x i16>* %b
+ store <32 x i16> %res, ptr %b
ret void
}
-define void @fcvtzu_v64f32_v64i16(<64 x float>* %a, <64 x i16>* %b) vscale_range(16,0) #0 {
+define void @fcvtzu_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzu_v64f32_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fcvtzu z0.s, p1/m, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
+ %op1 = load <64 x float>, ptr %a
%res = fptoui <64 x float> %op1 to <64 x i16>
- store <64 x i16> %res, <64 x i16>* %b
+ store <64 x i16> %res, ptr %b
ret void
}
ret <4 x i32> %res
}
-define void @fcvtzu_v8f32_v8i32(<8 x float>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @fcvtzu_v8f32_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzu_v8f32_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
+ %op1 = load <8 x float>, ptr %a
%res = fptoui <8 x float> %op1 to <8 x i32>
- store <8 x i32> %res, <8 x i32>* %b
+ store <8 x i32> %res, ptr %b
ret void
}
-define void @fcvtzu_v16f32_v16i32(<16 x float>* %a, <16 x i32>* %b) #0 {
+define void @fcvtzu_v16f32_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
+ %op1 = load <16 x float>, ptr %a
%res = fptoui <16 x float> %op1 to <16 x i32>
- store <16 x i32> %res, <16 x i32>* %b
+ store <16 x i32> %res, ptr %b
ret void
}
-define void @fcvtzu_v32f32_v32i32(<32 x float>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @fcvtzu_v32f32_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzu_v32f32_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
+ %op1 = load <32 x float>, ptr %a
%res = fptoui <32 x float> %op1 to <32 x i32>
- store <32 x i32> %res, <32 x i32>* %b
+ store <32 x i32> %res, ptr %b
ret void
}
-define void @fcvtzu_v64f32_v64i32(<64 x float>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @fcvtzu_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzu_v64f32_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
+ %op1 = load <64 x float>, ptr %a
%res = fptoui <64 x float> %op1 to <64 x i32>
- store <64 x i32> %res, <64 x i32>* %b
+ store <64 x i32> %res, ptr %b
ret void
}
ret <2 x i64> %res
}
-define void @fcvtzu_v4f32_v4i64(<4 x float>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @fcvtzu_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzu_v4f32_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x float>, <4 x float>* %a
+ %op1 = load <4 x float>, ptr %a
%res = fptoui <4 x float> %op1 to <4 x i64>
- store <4 x i64> %res, <4 x i64>* %b
+ store <4 x i64> %res, ptr %b
ret void
}
-define void @fcvtzu_v8f32_v8i64(<8 x float>* %a, <8 x i64>* %b) #0 {
+define void @fcvtzu_v8f32_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzu_v8f32_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
+ %op1 = load <8 x float>, ptr %a
%res = fptoui <8 x float> %op1 to <8 x i64>
- store <8 x i64> %res, <8 x i64>* %b
+ store <8 x i64> %res, ptr %b
ret void
}
-define void @fcvtzu_v16f32_v16i64(<16 x float>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @fcvtzu_v16f32_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzu_v16f32_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
+ %op1 = load <16 x float>, ptr %a
%res = fptoui <16 x float> %op1 to <16 x i64>
- store <16 x i64> %res, <16 x i64>* %b
+ store <16 x i64> %res, ptr %b
ret void
}
-define void @fcvtzu_v32f32_v32i64(<32 x float>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @fcvtzu_v32f32_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzu_v32f32_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
+ %op1 = load <32 x float>, ptr %a
%res = fptoui <32 x float> %op1 to <32 x i64>
- store <32 x i64> %res, <32 x i64>* %b
+ store <32 x i64> %res, ptr %b
ret void
}
ret <2 x i16> %res
}
-define <4 x i16> @fcvtzu_v4f64_v4i16(<4 x double>* %a) vscale_range(2,0) #0 {
+define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzu_v4f64_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
+ %op1 = load <4 x double>, ptr %a
%res = fptoui <4 x double> %op1 to <4 x i16>
ret <4 x i16> %res
}
-define <8 x i16> @fcvtzu_v8f64_v8i16(<8 x double>* %a) #0 {
+define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
+ %op1 = load <8 x double>, ptr %a
%res = fptoui <8 x double> %op1 to <8 x i16>
ret <8 x i16> %res
}
-define void @fcvtzu_v16f64_v16i16(<16 x double>* %a, <16 x i16>* %b) vscale_range(8,0) #0 {
+define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzu_v16f64_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvtzu z0.d, p1/m, z0.d
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
+ %op1 = load <16 x double>, ptr %a
%res = fptoui <16 x double> %op1 to <16 x i16>
- store <16 x i16> %res, <16 x i16>* %b
+ store <16 x i16> %res, ptr %b
ret void
}
-define void @fcvtzu_v32f64_v32i16(<32 x double>* %a, <32 x i16>* %b) vscale_range(16,0) #0 {
+define void @fcvtzu_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzu_v32f64_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvtzu z0.d, p1/m, z0.d
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
+ %op1 = load <32 x double>, ptr %a
%res = fptoui <32 x double> %op1 to <32 x i16>
- store <32 x i16> %res, <32 x i16>* %b
+ store <32 x i16> %res, ptr %b
ret void
}
ret <2 x i32> %res
}
-define <4 x i32> @fcvtzu_v4f64_v4i32(<4 x double>* %a) vscale_range(2,0) #0 {
+define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzu_v4f64_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
+ %op1 = load <4 x double>, ptr %a
%res = fptoui <4 x double> %op1 to <4 x i32>
ret <4 x i32> %res
}
-define void @fcvtzu_v8f64_v8i32(<8 x double>* %a, <8 x i32>* %b) #0 {
+define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fcvtzu z0.d, p1/m, z0.d
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
+ %op1 = load <8 x double>, ptr %a
%res = fptoui <8 x double> %op1 to <8 x i32>
- store <8 x i32> %res, <8 x i32>* %b
+ store <8 x i32> %res, ptr %b
ret void
}
-define void @fcvtzu_v16f64_v16i32(<16 x double>* %a, <16 x i32>* %b) vscale_range(8,0) #0 {
+define void @fcvtzu_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzu_v16f64_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvtzu z0.d, p1/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
+ %op1 = load <16 x double>, ptr %a
%res = fptoui <16 x double> %op1 to <16 x i32>
- store <16 x i32> %res, <16 x i32>* %b
+ store <16 x i32> %res, ptr %b
ret void
}
-define void @fcvtzu_v32f64_v32i32(<32 x double>* %a, <32 x i32>* %b) vscale_range(16,0) #0 {
+define void @fcvtzu_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzu_v32f64_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvtzu z0.d, p1/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
+ %op1 = load <32 x double>, ptr %a
%res = fptoui <32 x double> %op1 to <32 x i32>
- store <32 x i32> %res, <32 x i32>* %b
+ store <32 x i32> %res, ptr %b
ret void
}
ret <2 x i64> %res
}
-define void @fcvtzu_v4f64_v4i64(<4 x double>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @fcvtzu_v4f64_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzu_v4f64_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
+ %op1 = load <4 x double>, ptr %a
%res = fptoui <4 x double> %op1 to <4 x i64>
- store <4 x i64> %res, <4 x i64>* %b
+ store <4 x i64> %res, ptr %b
ret void
}
-define void @fcvtzu_v8f64_v8i64(<8 x double>* %a, <8 x i64>* %b) #0 {
+define void @fcvtzu_v8f64_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
+ %op1 = load <8 x double>, ptr %a
%res = fptoui <8 x double> %op1 to <8 x i64>
- store <8 x i64> %res, <8 x i64>* %b
+ store <8 x i64> %res, ptr %b
ret void
}
-define void @fcvtzu_v16f64_v16i64(<16 x double>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @fcvtzu_v16f64_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzu_v16f64_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
+ %op1 = load <16 x double>, ptr %a
%res = fptoui <16 x double> %op1 to <16 x i64>
- store <16 x i64> %res, <16 x i64>* %b
+ store <16 x i64> %res, ptr %b
ret void
}
-define void @fcvtzu_v32f64_v32i64(<32 x double>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @fcvtzu_v32f64_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzu_v32f64_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
+ %op1 = load <32 x double>, ptr %a
%res = fptoui <32 x double> %op1 to <32 x i64>
- store <32 x i64> %res, <32 x i64>* %b
+ store <32 x i64> %res, ptr %b
ret void
}
}
; Don't use SVE for 128-bit vectors.
-define void @fcvtzs_v8f16_v8i16(<8 x half>* %a, <8 x i16>* %b) vscale_range(2,0) #0 {
+define void @fcvtzs_v8f16_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzs_v8f16_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: fcvtzs v0.8h, v0.8h
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x half>, <8 x half>* %a
+ %op1 = load <8 x half>, ptr %a
%res = fptosi <8 x half> %op1 to <8 x i16>
- store <8 x i16> %res, <8 x i16>* %b
+ store <8 x i16> %res, ptr %b
ret void
}
-define void @fcvtzs_v16f16_v16i16(<16 x half>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @fcvtzs_v16f16_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzs_v16f16_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
+ %op1 = load <16 x half>, ptr %a
%res = fptosi <16 x half> %op1 to <16 x i16>
- store <16 x i16> %res, <16 x i16>* %b
+ store <16 x i16> %res, ptr %b
ret void
}
-define void @fcvtzs_v32f16_v32i16(<32 x half>* %a, <32 x i16>* %b) #0 {
+define void @fcvtzs_v32f16_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzs_v32f16_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fcvtzs z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
+ %op1 = load <32 x half>, ptr %a
%res = fptosi <32 x half> %op1 to <32 x i16>
- store <32 x i16> %res, <32 x i16>* %b
+ store <32 x i16> %res, ptr %b
ret void
}
-define void @fcvtzs_v64f16_v64i16(<64 x half>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @fcvtzs_v64f16_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzs_v64f16_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
+ %op1 = load <64 x half>, ptr %a
%res = fptosi <64 x half> %op1 to <64 x i16>
- store <64 x i16> %res, <64 x i16>* %b
+ store <64 x i16> %res, ptr %b
ret void
}
-define void @fcvtzs_v128f16_v128i16(<128 x half>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @fcvtzs_v128f16_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzs_v128f16_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
+ %op1 = load <128 x half>, ptr %a
%res = fptosi <128 x half> %op1 to <128 x i16>
- store <128 x i16> %res, <128 x i16>* %b
+ store <128 x i16> %res, ptr %b
ret void
}
ret <4 x i32> %res
}
-define void @fcvtzs_v8f16_v8i32(<8 x half>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @fcvtzs_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzs_v8f16_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x half>, <8 x half>* %a
+ %op1 = load <8 x half>, ptr %a
%res = fptosi <8 x half> %op1 to <8 x i32>
- store <8 x i32> %res, <8 x i32>* %b
+ store <8 x i32> %res, ptr %b
ret void
}
-define void @fcvtzs_v16f16_v16i32(<16 x half>* %a, <16 x i32>* %b) #0 {
+define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzs_v16f16_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.h
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
+ %op1 = load <16 x half>, ptr %a
%res = fptosi <16 x half> %op1 to <16 x i32>
- store <16 x i32> %res, <16 x i32>* %b
+ store <16 x i32> %res, ptr %b
ret void
}
-define void @fcvtzs_v32f16_v32i32(<32 x half>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @fcvtzs_v32f16_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzs_v32f16_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
+ %op1 = load <32 x half>, ptr %a
%res = fptosi <32 x half> %op1 to <32 x i32>
- store <32 x i32> %res, <32 x i32>* %b
+ store <32 x i32> %res, ptr %b
ret void
}
-define void @fcvtzs_v64f16_v64i32(<64 x half>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @fcvtzs_v64f16_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzs_v64f16_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
+ %op1 = load <64 x half>, ptr %a
%res = fptosi <64 x half> %op1 to <64 x i32>
- store <64 x i32> %res, <64 x i32>* %b
+ store <64 x i32> %res, ptr %b
ret void
}
ret <2 x i64> %res
}
-define void @fcvtzs_v4f16_v4i64(<4 x half>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzs_v4f16_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x half>, <4 x half>* %a
+ %op1 = load <4 x half>, ptr %a
%res = fptosi <4 x half> %op1 to <4 x i64>
- store <4 x i64> %res, <4 x i64>* %b
+ store <4 x i64> %res, ptr %b
ret void
}
-define void @fcvtzs_v8f16_v8i64(<8 x half>* %a, <8 x i64>* %b) #0 {
+define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzs_v8f16_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x0]
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.h
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x half>, <8 x half>* %a
+ %op1 = load <8 x half>, ptr %a
%res = fptosi <8 x half> %op1 to <8 x i64>
- store <8 x i64> %res, <8 x i64>* %b
+ store <8 x i64> %res, ptr %b
ret void
}
-define void @fcvtzs_v16f16_v16i64(<16 x half>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzs_v16f16_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
+ %op1 = load <16 x half>, ptr %a
%res = fptosi <16 x half> %op1 to <16 x i64>
- store <16 x i64> %res, <16 x i64>* %b
+ store <16 x i64> %res, ptr %b
ret void
}
-define void @fcvtzs_v32f16_v32i64(<32 x half>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @fcvtzs_v32f16_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzs_v32f16_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
+ %op1 = load <32 x half>, ptr %a
%res = fptosi <32 x half> %op1 to <32 x i64>
- store <32 x i64> %res, <32 x i64>* %b
+ store <32 x i64> %res, ptr %b
ret void
}
ret <4 x i16> %res
}
-define <8 x i16> @fcvtzs_v8f32_v8i16(<8 x float>* %a) vscale_range(2,0) #0 {
+define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzs_v8f32_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
+ %op1 = load <8 x float>, ptr %a
%res = fptosi <8 x float> %op1 to <8 x i16>
ret <8 x i16> %res
}
-define void @fcvtzs_v16f32_v16i16(<16 x float>* %a, <16 x i16>* %b) #0 {
+define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fcvtzs z0.s, p1/m, z0.s
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
+ %op1 = load <16 x float>, ptr %a
%res = fptosi <16 x float> %op1 to <16 x i16>
- store <16 x i16> %res, <16 x i16>* %b
+ store <16 x i16> %res, ptr %b
ret void
}
-define void @fcvtzs_v32f32_v32i16(<32 x float>* %a, <32 x i16>* %b) vscale_range(8,0) #0 {
+define void @fcvtzs_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzs_v32f32_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fcvtzs z0.s, p1/m, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
+ %op1 = load <32 x float>, ptr %a
%res = fptosi <32 x float> %op1 to <32 x i16>
- store <32 x i16> %res, <32 x i16>* %b
+ store <32 x i16> %res, ptr %b
ret void
}
-define void @fcvtzs_v64f32_v64i16(<64 x float>* %a, <64 x i16>* %b) vscale_range(16,0) #0 {
+define void @fcvtzs_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzs_v64f32_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fcvtzs z0.s, p1/m, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
+ %op1 = load <64 x float>, ptr %a
%res = fptosi <64 x float> %op1 to <64 x i16>
- store <64 x i16> %res, <64 x i16>* %b
+ store <64 x i16> %res, ptr %b
ret void
}
ret <4 x i32> %res
}
-define void @fcvtzs_v8f32_v8i32(<8 x float>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @fcvtzs_v8f32_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzs_v8f32_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
+ %op1 = load <8 x float>, ptr %a
%res = fptosi <8 x float> %op1 to <8 x i32>
- store <8 x i32> %res, <8 x i32>* %b
+ store <8 x i32> %res, ptr %b
ret void
}
-define void @fcvtzs_v16f32_v16i32(<16 x float>* %a, <16 x i32>* %b) #0 {
+define void @fcvtzs_v16f32_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
+ %op1 = load <16 x float>, ptr %a
%res = fptosi <16 x float> %op1 to <16 x i32>
- store <16 x i32> %res, <16 x i32>* %b
+ store <16 x i32> %res, ptr %b
ret void
}
-define void @fcvtzs_v32f32_v32i32(<32 x float>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @fcvtzs_v32f32_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzs_v32f32_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
+ %op1 = load <32 x float>, ptr %a
%res = fptosi <32 x float> %op1 to <32 x i32>
- store <32 x i32> %res, <32 x i32>* %b
+ store <32 x i32> %res, ptr %b
ret void
}
-define void @fcvtzs_v64f32_v64i32(<64 x float>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @fcvtzs_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzs_v64f32_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
+ %op1 = load <64 x float>, ptr %a
%res = fptosi <64 x float> %op1 to <64 x i32>
- store <64 x i32> %res, <64 x i32>* %b
+ store <64 x i32> %res, ptr %b
ret void
}
ret <2 x i64> %res
}
-define void @fcvtzs_v4f32_v4i64(<4 x float>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @fcvtzs_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzs_v4f32_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x float>, <4 x float>* %a
+ %op1 = load <4 x float>, ptr %a
%res = fptosi <4 x float> %op1 to <4 x i64>
- store <4 x i64> %res, <4 x i64>* %b
+ store <4 x i64> %res, ptr %b
ret void
}
-define void @fcvtzs_v8f32_v8i64(<8 x float>* %a, <8 x i64>* %b) #0 {
+define void @fcvtzs_v8f32_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzs_v8f32_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
+ %op1 = load <8 x float>, ptr %a
%res = fptosi <8 x float> %op1 to <8 x i64>
- store <8 x i64> %res, <8 x i64>* %b
+ store <8 x i64> %res, ptr %b
ret void
}
-define void @fcvtzs_v16f32_v16i64(<16 x float>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @fcvtzs_v16f32_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzs_v16f32_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
+ %op1 = load <16 x float>, ptr %a
%res = fptosi <16 x float> %op1 to <16 x i64>
- store <16 x i64> %res, <16 x i64>* %b
+ store <16 x i64> %res, ptr %b
ret void
}
-define void @fcvtzs_v32f32_v32i64(<32 x float>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @fcvtzs_v32f32_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzs_v32f32_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
+ %op1 = load <32 x float>, ptr %a
%res = fptosi <32 x float> %op1 to <32 x i64>
- store <32 x i64> %res, <32 x i64>* %b
+ store <32 x i64> %res, ptr %b
ret void
}
ret <2 x i16> %res
}
-define <4 x i16> @fcvtzs_v4f64_v4i16(<4 x double>* %a) vscale_range(2,0) #0 {
+define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzs_v4f64_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
+ %op1 = load <4 x double>, ptr %a
%res = fptosi <4 x double> %op1 to <4 x i16>
ret <4 x i16> %res
}
-define <8 x i16> @fcvtzs_v8f64_v8i16(<8 x double>* %a) #0 {
+define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
+ %op1 = load <8 x double>, ptr %a
%res = fptosi <8 x double> %op1 to <8 x i16>
ret <8 x i16> %res
}
-define void @fcvtzs_v16f64_v16i16(<16 x double>* %a, <16 x i16>* %b) vscale_range(8,0) #0 {
+define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzs_v16f64_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
+ %op1 = load <16 x double>, ptr %a
%res = fptosi <16 x double> %op1 to <16 x i16>
- store <16 x i16> %res, <16 x i16>* %b
+ store <16 x i16> %res, ptr %b
ret void
}
-define void @fcvtzs_v32f64_v32i16(<32 x double>* %a, <32 x i16>* %b) vscale_range(16,0) #0 {
+define void @fcvtzs_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzs_v32f64_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
+ %op1 = load <32 x double>, ptr %a
%res = fptosi <32 x double> %op1 to <32 x i16>
- store <32 x i16> %res, <32 x i16>* %b
+ store <32 x i16> %res, ptr %b
ret void
}
ret <2 x i32> %res
}
-define <4 x i32> @fcvtzs_v4f64_v4i32(<4 x double>* %a) vscale_range(2,0) #0 {
+define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzs_v4f64_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
+ %op1 = load <4 x double>, ptr %a
%res = fptosi <4 x double> %op1 to <4 x i32>
ret <4 x i32> %res
}
-define void @fcvtzs_v8f64_v8i32(<8 x double>* %a, <8 x i32>* %b) #0 {
+define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fcvtzs z0.d, p1/m, z0.d
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
+ %op1 = load <8 x double>, ptr %a
%res = fptosi <8 x double> %op1 to <8 x i32>
- store <8 x i32> %res, <8 x i32>* %b
+ store <8 x i32> %res, ptr %b
ret void
}
-define void @fcvtzs_v16f64_v16i32(<16 x double>* %a, <16 x i32>* %b) vscale_range(8,0) #0 {
+define void @fcvtzs_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzs_v16f64_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
+ %op1 = load <16 x double>, ptr %a
%res = fptosi <16 x double> %op1 to <16 x i32>
- store <16 x i32> %res, <16 x i32>* %b
+ store <16 x i32> %res, ptr %b
ret void
}
-define void @fcvtzs_v32f64_v32i32(<32 x double>* %a, <32 x i32>* %b) vscale_range(16,0) #0 {
+define void @fcvtzs_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzs_v32f64_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
+ %op1 = load <32 x double>, ptr %a
%res = fptosi <32 x double> %op1 to <32 x i32>
- store <32 x i32> %res, <32 x i32>* %b
+ store <32 x i32> %res, ptr %b
ret void
}
ret <2 x i64> %res
}
-define void @fcvtzs_v4f64_v4i64(<4 x double>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @fcvtzs_v4f64_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: fcvtzs_v4f64_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
+ %op1 = load <4 x double>, ptr %a
%res = fptosi <4 x double> %op1 to <4 x i64>
- store <4 x i64> %res, <4 x i64>* %b
+ store <4 x i64> %res, ptr %b
ret void
}
-define void @fcvtzs_v8f64_v8i64(<8 x double>* %a, <8 x i64>* %b) #0 {
+define void @fcvtzs_v8f64_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
+ %op1 = load <8 x double>, ptr %a
%res = fptosi <8 x double> %op1 to <8 x i64>
- store <8 x i64> %res, <8 x i64>* %b
+ store <8 x i64> %res, ptr %b
ret void
}
-define void @fcvtzs_v16f64_v16i64(<16 x double>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @fcvtzs_v16f64_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: fcvtzs_v16f64_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
+ %op1 = load <16 x double>, ptr %a
%res = fptosi <16 x double> %op1 to <16 x i64>
- store <16 x i64> %res, <16 x i64>* %b
+ store <16 x i64> %res, ptr %b
ret void
}
-define void @fcvtzs_v32f64_v32i64(<32 x double>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @fcvtzs_v32f64_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: fcvtzs_v32f64_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
+ %op1 = load <32 x double>, ptr %a
%res = fptosi <32 x double> %op1 to <32 x i64>
- store <32 x i64> %res, <32 x i64>* %b
+ store <32 x i64> %res, ptr %b
ret void
}
ret <8 x half> %sel
}
-define void @select_v16f16(<16 x half>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @select_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%mask = fcmp oeq <16 x half> %op1, %op2
%sel = select <16 x i1> %mask, <16 x half> %op1, <16 x half> %op2
- store <16 x half> %sel, <16 x half>* %a
+ store <16 x half> %sel, ptr %a
ret void
}
-define void @select_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
+define void @select_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: select_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
%mask = fcmp oeq <32 x half> %op1, %op2
%sel = select <32 x i1> %mask, <32 x half> %op1, <32 x half> %op2
- store <32 x half> %sel, <32 x half>* %a
+ store <32 x half> %sel, ptr %a
ret void
}
-define void @select_v64f16(<64 x half>* %a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @select_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
%mask = fcmp oeq <64 x half> %op1, %op2
%sel = select <64 x i1> %mask, <64 x half> %op1, <64 x half> %op2
- store <64 x half> %sel, <64 x half>* %a
+ store <64 x half> %sel, ptr %a
ret void
}
-define void @select_v128f16(<128 x half>* %a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @select_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
- %op2 = load <128 x half>, <128 x half>* %b
+ %op1 = load <128 x half>, ptr %a
+ %op2 = load <128 x half>, ptr %b
%mask = fcmp oeq <128 x half> %op1, %op2
%sel = select <128 x i1> %mask, <128 x half> %op1, <128 x half> %op2
- store <128 x half> %sel, <128 x half>* %a
+ store <128 x half> %sel, ptr %a
ret void
}
ret <4 x float> %sel
}
-define void @select_v8f32(<8 x float>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @select_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
%mask = fcmp oeq <8 x float> %op1, %op2
%sel = select <8 x i1> %mask, <8 x float> %op1, <8 x float> %op2
- store <8 x float> %sel, <8 x float>* %a
+ store <8 x float> %sel, ptr %a
ret void
}
-define void @select_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
+define void @select_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: select_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
%mask = fcmp oeq <16 x float> %op1, %op2
%sel = select <16 x i1> %mask, <16 x float> %op1, <16 x float> %op2
- store <16 x float> %sel, <16 x float>* %a
+ store <16 x float> %sel, ptr %a
ret void
}
-define void @select_v32f32(<32 x float>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @select_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
%mask = fcmp oeq <32 x float> %op1, %op2
%sel = select <32 x i1> %mask, <32 x float> %op1, <32 x float> %op2
- store <32 x float> %sel, <32 x float>* %a
+ store <32 x float> %sel, ptr %a
ret void
}
-define void @select_v64f32(<64 x float>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @select_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
- %op2 = load <64 x float>, <64 x float>* %b
+ %op1 = load <64 x float>, ptr %a
+ %op2 = load <64 x float>, ptr %b
%mask = fcmp oeq <64 x float> %op1, %op2
%sel = select <64 x i1> %mask, <64 x float> %op1, <64 x float> %op2
- store <64 x float> %sel, <64 x float>* %a
+ store <64 x float> %sel, ptr %a
ret void
}
ret <2 x double> %sel
}
-define void @select_v4f64(<4 x double>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @select_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%mask = fcmp oeq <4 x double> %op1, %op2
%sel = select <4 x i1> %mask, <4 x double> %op1, <4 x double> %op2
- store <4 x double> %sel, <4 x double>* %a
+ store <4 x double> %sel, ptr %a
ret void
}
-define void @select_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
+define void @select_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: select_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
%mask = fcmp oeq <8 x double> %op1, %op2
%sel = select <8 x i1> %mask, <8 x double> %op1, <8 x double> %op2
- store <8 x double> %sel, <8 x double>* %a
+ store <8 x double> %sel, ptr %a
ret void
}
-define void @select_v16f64(<16 x double>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @select_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
%mask = fcmp oeq <16 x double> %op1, %op2
%sel = select <16 x i1> %mask, <16 x double> %op1, <16 x double> %op2
- store <16 x double> %sel, <16 x double>* %a
+ store <16 x double> %sel, ptr %a
ret void
}
-define void @select_v32f64(<32 x double>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @select_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
- %op2 = load <32 x double>, <32 x double>* %b
+ %op1 = load <32 x double>, ptr %a
+ %op2 = load <32 x double>, ptr %b
%mask = fcmp oeq <32 x double> %op1, %op2
%sel = select <32 x i1> %mask, <32 x double> %op1, <32 x double> %op2
- store <32 x double> %sel, <32 x double>* %a
+ store <32 x double> %sel, ptr %a
ret void
}
; too many extra vregs during frame lowering, when we don't have an emergency
; spill slot.
-define dso_local void @func1(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8,
+define dso_local void @func1(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v6, ptr %v7, ptr %v8,
; CHECK-LABEL: func1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x25, [sp, #-64]! // 8-byte Folded Spill
; CHECK-NEXT: str x8, [sp, #352]
; CHECK-NEXT: ldr x25, [sp], #64 // 8-byte Folded Reload
; CHECK-NEXT: b func2
- i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* %v13, i64* %v14, i64* %v15, i64* %v16,
- i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24,
- i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* %v31, i64* %v32,
- i64* %v33, i64* %v34, i64* %v35, i64* %v36, i64* %v37, i64* %v38, i64* %v39, i64* %v40,
- i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* %v46, i64* %v47, i64* %v48,
+ ptr %v9, ptr %v10, ptr %v11, ptr %v12, ptr %v13, ptr %v14, ptr %v15, ptr %v16,
+ ptr %v17, ptr %v18, ptr %v19, ptr %v20, ptr %v21, ptr %v22, ptr %v23, ptr %v24,
+ ptr %v25, ptr %v26, ptr %v27, ptr %v28, ptr %v29, ptr %v30, ptr %v31, ptr %v32,
+ ptr %v33, ptr %v34, ptr %v35, ptr %v36, ptr %v37, ptr %v38, ptr %v39, ptr %v40,
+ ptr %v41, ptr %v42, ptr %v43, ptr %v44, ptr %v45, ptr %v46, ptr %v47, ptr %v48,
i64 %v49) #0 {
- tail call void @func2(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8,
- i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* undef, i64* %v14, i64* %v15, i64* %v16,
- i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24,
- i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* undef, i64* undef,
- i64* undef, i64* undef, i64* undef, i64* undef, i64* %v37, i64* %v38, i64* %v39, i64* %v40,
- i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* undef, i64* %v47, i64* %v48,
+ tail call void @func2(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v6, ptr %v7, ptr %v8,
+ ptr %v9, ptr %v10, ptr %v11, ptr %v12, ptr undef, ptr %v14, ptr %v15, ptr %v16,
+ ptr %v17, ptr %v18, ptr %v19, ptr %v20, ptr %v21, ptr %v22, ptr %v23, ptr %v24,
+ ptr %v25, ptr %v26, ptr %v27, ptr %v28, ptr %v29, ptr %v30, ptr undef, ptr undef,
+ ptr undef, ptr undef, ptr undef, ptr undef, ptr %v37, ptr %v38, ptr %v39, ptr %v40,
+ ptr %v41, ptr %v42, ptr %v43, ptr %v44, ptr %v45, ptr undef, ptr %v47, ptr %v48,
i64 undef)
ret void
}
-declare dso_local void @func2(i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
- i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
- i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
- i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
- i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
- i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
+declare dso_local void @func2(ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr,
+ ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr,
+ ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr,
+ ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr,
+ ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr,
+ ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr,
i64)
attributes #0 = { "target-features"="+sve" vscale_range(2,2) }
; Ensure that only no offset frame indexes are folded into SVE load/stores when
; accessing fixed width objects.
-define void @foo(<8 x i64>* %a) #0 {
+define void @foo(ptr %a) #0 {
; CHECK-LABEL: foo:
; CHECK: SelectionDAG has 14 nodes:
; CHECK-NEXT: t0: ch,glue = EntryToken
entry:
%r0 = alloca <8 x i64>
%r1 = alloca <8 x i64>
- %r = load volatile <8 x i64>, <8 x i64>* %a
- store volatile <8 x i64> %r, <8 x i64>* %r0
- store volatile <8 x i64> %r, <8 x i64>* %r1
+ %r = load volatile <8 x i64>, ptr %a
+ store volatile <8 x i64> %r, ptr %r0
+ store volatile <8 x i64> %r, ptr %r1
ret void
}
ret <8 x half> %r
}
-define <16 x half> @insertelement_v16f16(<16 x half>* %a) vscale_range(2,0) #0 {
+define <16 x half> @insertelement_v16f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: insertelement_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #15
; CHECK-NEXT: mov z0.h, p1/m, h2
; CHECK-NEXT: st1h { z0.h }, p0, [x8]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
+ %op1 = load <16 x half>, ptr %a
%r = insertelement <16 x half> %op1, half 5.0, i64 15
ret <16 x half> %r
}
-define <32 x half> @insertelement_v32f16(<32 x half>* %a) #0 {
+define <32 x half> @insertelement_v32f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: insertelement_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #16
; VBITS_GE_512-NEXT: mov z0.h, p1/m, h2
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
+ %op1 = load <32 x half>, ptr %a
%r = insertelement <32 x half> %op1, half 5.0, i64 31
ret <32 x half> %r
}
-define <64 x half> @insertelement_v64f16(<64 x half>* %a) vscale_range(8,0) #0 {
+define <64 x half> @insertelement_v64f16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: insertelement_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #63
; CHECK-NEXT: mov z0.h, p1/m, h2
; CHECK-NEXT: st1h { z0.h }, p0, [x8]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
+ %op1 = load <64 x half>, ptr %a
%r = insertelement <64 x half> %op1, half 5.0, i64 63
ret <64 x half> %r
}
-define <128 x half> @insertelement_v128f16(<128 x half>* %a) vscale_range(16,0) #0 {
+define <128 x half> @insertelement_v128f16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: insertelement_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #127
; CHECK-NEXT: mov z0.h, p1/m, h2
; CHECK-NEXT: st1h { z0.h }, p0, [x8]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
+ %op1 = load <128 x half>, ptr %a
%r = insertelement <128 x half> %op1, half 5.0, i64 127
ret <128 x half> %r
}
ret <4 x float> %r
}
-define <8 x float> @insertelement_v8f32(<8 x float>* %a) vscale_range(2,0) #0 {
+define <8 x float> @insertelement_v8f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: insertelement_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #7
; CHECK-NEXT: mov z0.s, p1/m, s2
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
+ %op1 = load <8 x float>, ptr %a
%r = insertelement <8 x float> %op1, float 5.0, i64 7
ret <8 x float> %r
}
-define <16 x float> @insertelement_v16f32(<16 x float>* %a) #0 {
+define <16 x float> @insertelement_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: insertelement_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #8
; VBITS_GE_512-NEXT: mov z0.s, p1/m, s2
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
+ %op1 = load <16 x float>, ptr %a
%r = insertelement <16 x float> %op1, float 5.0, i64 15
ret <16 x float> %r
}
-define <32 x float> @insertelement_v32f32(<32 x float>* %a) vscale_range(8,0) #0 {
+define <32 x float> @insertelement_v32f32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: insertelement_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #31
; CHECK-NEXT: mov z0.s, p1/m, s2
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
+ %op1 = load <32 x float>, ptr %a
%r = insertelement <32 x float> %op1, float 5.0, i64 31
ret <32 x float> %r
}
-define <64 x float> @insertelement_v64f32(<64 x float>* %a) vscale_range(16,0) #0 {
+define <64 x float> @insertelement_v64f32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: insertelement_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #63
; CHECK-NEXT: mov z0.s, p1/m, s2
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
+ %op1 = load <64 x float>, ptr %a
%r = insertelement <64 x float> %op1, float 5.0, i64 63
ret <64 x float> %r
}
ret <2 x double> %r
}
-define <4 x double> @insertelement_v4f64(<4 x double>* %a) vscale_range(2,0) #0 {
+define <4 x double> @insertelement_v4f64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: insertelement_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #3
; CHECK-NEXT: mov z0.d, p1/m, d2
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
+ %op1 = load <4 x double>, ptr %a
%r = insertelement <4 x double> %op1, double 5.0, i64 3
ret <4 x double> %r
}
-define <8 x double> @insertelement_v8f64(<8 x double>* %a) #0 {
+define <8 x double> @insertelement_v8f64(ptr %a) #0 {
; VBITS_GE_256-LABEL: insertelement_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #4
; VBITS_GE_512-NEXT: mov z0.d, p1/m, d2
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
+ %op1 = load <8 x double>, ptr %a
%r = insertelement <8 x double> %op1, double 5.0, i64 7
ret <8 x double> %r
}
-define <16 x double> @insertelement_v16f64(<16 x double>* %a) vscale_range(8,0) #0 {
+define <16 x double> @insertelement_v16f64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: insertelement_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #15
; CHECK-NEXT: mov z0.d, p1/m, d2
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
+ %op1 = load <16 x double>, ptr %a
%r = insertelement <16 x double> %op1, double 5.0, i64 15
ret <16 x double> %r
}
-define <32 x double> @insertelement_v32f64(<32 x double>* %a) vscale_range(16,0) #0 {
+define <32 x double> @insertelement_v32f64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: insertelement_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #31
; CHECK-NEXT: mov z0.d, p1/m, d2
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
+ %op1 = load <32 x double>, ptr %a
%r = insertelement <32 x double> %op1, double 5.0, i64 31
ret <32 x double> %r
}
ret <16 x i8> %res
}
-define void @add_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @add_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: add_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: add z0.b, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = add <32 x i8> %op1, %op2
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @add_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @add_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: add_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: add z0.b, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = add <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @add_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @add_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: add_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: add z0.b, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = add <128 x i8> %op1, %op2
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @add_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @add_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: add_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: add z0.b, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = add <256 x i8> %op1, %op2
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @add_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @add_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: add_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = add <16 x i16> %op1, %op2
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @add_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @add_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: add_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: add z0.h, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = add <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @add_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @add_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: add_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = add <64 x i16> %op1, %op2
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @add_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @add_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: add_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = add <128 x i16> %op1, %op2
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @add_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @add_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: add_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: add z0.s, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = add <8 x i32> %op1, %op2
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @add_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @add_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: add_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: add z0.s, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = add <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @add_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @add_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: add_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: add z0.s, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = add <32 x i32> %op1, %op2
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @add_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @add_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: add_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: add z0.s, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = add <64 x i32> %op1, %op2
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @add_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @add_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: add_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: add z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = add <4 x i64> %op1, %op2
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @add_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @add_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: add_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: add z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = add <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @add_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @add_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: add_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: add z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = add <16 x i64> %op1, %op2
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @add_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(8,0) #0 {
+define void @add_v32i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: add_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #16
; CHECK-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; CHECK-NEXT: st1d { z1.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = add <32 x i64> %op1, %op2
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @mul_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @mul_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: mul_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = mul <32 x i8> %op1, %op2
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @mul_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @mul_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: mul_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: mul z0.b, p0/m, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = mul <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @mul_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @mul_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: mul_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = mul <128 x i8> %op1, %op2
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @mul_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @mul_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: mul_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = mul <256 x i8> %op1, %op2
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @mul_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @mul_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: mul_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = mul <16 x i16> %op1, %op2
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @mul_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @mul_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: mul_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: mul z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = mul <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @mul_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @mul_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: mul_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = mul <64 x i16> %op1, %op2
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @mul_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @mul_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: mul_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = mul <128 x i16> %op1, %op2
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @mul_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @mul_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: mul_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = mul <8 x i32> %op1, %op2
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @mul_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @mul_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: mul_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: mul z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = mul <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @mul_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @mul_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: mul_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = mul <32 x i32> %op1, %op2
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @mul_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @mul_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: mul_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = mul <64 x i32> %op1, %op2
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @mul_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @mul_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: mul_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = mul <4 x i64> %op1, %op2
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @mul_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @mul_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: mul_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: mul z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = mul <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @mul_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @mul_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: mul_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = mul <16 x i64> %op1, %op2
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @mul_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @mul_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: mul_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = mul <32 x i64> %op1, %op2
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @sub_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @sub_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: sub_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = sub <32 x i8> %op1, %op2
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @sub_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @sub_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: sub_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: sub z0.b, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = sub <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @sub_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @sub_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: sub_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = sub <128 x i8> %op1, %op2
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @sub_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @sub_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: sub_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = sub <256 x i8> %op1, %op2
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @sub_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @sub_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: sub_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = sub <16 x i16> %op1, %op2
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @sub_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @sub_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: sub_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: sub z0.h, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = sub <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @sub_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @sub_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: sub_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = sub <64 x i16> %op1, %op2
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @sub_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @sub_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: sub_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = sub <128 x i16> %op1, %op2
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @sub_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @sub_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: sub_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = sub <8 x i32> %op1, %op2
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @sub_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @sub_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: sub_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: sub z0.s, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = sub <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @sub_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @sub_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: sub_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = sub <32 x i32> %op1, %op2
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @sub_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @sub_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: sub_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = sub <64 x i32> %op1, %op2
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @sub_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @sub_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: sub_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = sub <4 x i64> %op1, %op2
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @sub_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @sub_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: sub_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: sub z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = sub <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @sub_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @sub_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: sub_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = sub <16 x i64> %op1, %op2
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @sub_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @sub_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: sub_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = sub <32 x i64> %op1, %op2
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @abs_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define void @abs_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: abs_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: abs z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
+ %op1 = load <32 x i8>, ptr %a
%res = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %op1, i1 false)
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @abs_v64i8(<64 x i8>* %a) #0 {
+define void @abs_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: abs_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: abs z0.b, p0/m, z0.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%res = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %op1, i1 false)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @abs_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define void @abs_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: abs_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: abs z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
+ %op1 = load <128 x i8>, ptr %a
%res = call <128 x i8> @llvm.abs.v128i8(<128 x i8> %op1, i1 false)
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @abs_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define void @abs_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: abs_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: abs z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
+ %op1 = load <256 x i8>, ptr %a
%res = call <256 x i8> @llvm.abs.v256i8(<256 x i8> %op1, i1 false)
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @abs_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define void @abs_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: abs_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: abs z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
+ %op1 = load <16 x i16>, ptr %a
%res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %op1, i1 false)
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @abs_v32i16(<32 x i16>* %a) vscale_range(2,0) #0 {
+define void @abs_v32i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: abs_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #16
; CHECK-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; CHECK-NEXT: st1h { z1.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%res = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %op1, i1 false)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @abs_v64i16(<64 x i16>* %a) vscale_range(2,0) #0 {
+define void @abs_v64i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: abs_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #48
; CHECK-NEXT: st1h { z1.h }, p0, [x0, x9, lsl #1]
; CHECK-NEXT: st1h { z3.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
+ %op1 = load <64 x i16>, ptr %a
%res = call <64 x i16> @llvm.abs.v64i16(<64 x i16> %op1, i1 false)
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @abs_v128i16(<128 x i16>* %a) vscale_range(2,0) #0 {
+define void @abs_v128i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: abs_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #96
; CHECK-NEXT: st1h { z1.h }, p0, [x0, x10, lsl #1]
; CHECK-NEXT: st1h { z7.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
+ %op1 = load <128 x i16>, ptr %a
%res = call <128 x i16> @llvm.abs.v128i16(<128 x i16> %op1, i1 false)
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @abs_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define void @abs_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: abs_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: abs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
+ %op1 = load <8 x i32>, ptr %a
%res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %op1, i1 false)
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @abs_v16i32(<16 x i32>* %a) #0 {
+define void @abs_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: abs_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: abs z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%res = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %op1, i1 false)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @abs_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define void @abs_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: abs_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: abs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
+ %op1 = load <32 x i32>, ptr %a
%res = call <32 x i32> @llvm.abs.v32i32(<32 x i32> %op1, i1 false)
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @abs_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define void @abs_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: abs_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: abs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
+ %op1 = load <64 x i32>, ptr %a
%res = call <64 x i32> @llvm.abs.v64i32(<64 x i32> %op1, i1 false)
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @abs_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define void @abs_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: abs_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: abs z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
+ %op1 = load <4 x i64>, ptr %a
%res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %op1, i1 false)
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @abs_v8i64(<8 x i64>* %a) #0 {
+define void @abs_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: abs_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: abs z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%res = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %op1, i1 false)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @abs_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define void @abs_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: abs_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: abs z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
+ %op1 = load <16 x i64>, ptr %a
%res = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %op1, i1 false)
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @abs_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define void @abs_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: abs_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: abs z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
+ %op1 = load <32 x i64>, ptr %a
%res = call <32 x i64> @llvm.abs.v32i64(<32 x i64> %op1, i1 false)
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %sext
}
-define void @icmp_eq_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @icmp_eq_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: icmp_eq_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%cmp = icmp eq <32 x i8> %op1, %op2
%sext = sext <32 x i1> %cmp to <32 x i8>
- store <32 x i8> %sext, <32 x i8>* %a
+ store <32 x i8> %sext, ptr %a
ret void
}
-define void @icmp_eq_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @icmp_eq_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: icmp_eq_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%cmp = icmp eq <64 x i8> %op1, %op2
%sext = sext <64 x i1> %cmp to <64 x i8>
- store <64 x i8> %sext, <64 x i8>* %a
+ store <64 x i8> %sext, ptr %a
ret void
}
-define void @icmp_eq_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @icmp_eq_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: icmp_eq_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%cmp = icmp eq <128 x i8> %op1, %op2
%sext = sext <128 x i1> %cmp to <128 x i8>
- store <128 x i8> %sext, <128 x i8>* %a
+ store <128 x i8> %sext, ptr %a
ret void
}
-define void @icmp_eq_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @icmp_eq_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: icmp_eq_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%cmp = icmp eq <256 x i8> %op1, %op2
%sext = sext <256 x i1> %cmp to <256 x i8>
- store <256 x i8> %sext, <256 x i8>* %a
+ store <256 x i8> %sext, ptr %a
ret void
}
ret <8 x i16> %sext
}
-define void @icmp_eq_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @icmp_eq_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: icmp_eq_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%cmp = icmp eq <16 x i16> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %a
+ store <16 x i16> %sext, ptr %a
ret void
}
-define void @icmp_eq_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @icmp_eq_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: icmp_eq_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%cmp = icmp eq <32 x i16> %op1, %op2
%sext = sext <32 x i1> %cmp to <32 x i16>
- store <32 x i16> %sext, <32 x i16>* %a
+ store <32 x i16> %sext, ptr %a
ret void
}
-define void @icmp_eq_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @icmp_eq_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: icmp_eq_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%cmp = icmp eq <64 x i16> %op1, %op2
%sext = sext <64 x i1> %cmp to <64 x i16>
- store <64 x i16> %sext, <64 x i16>* %a
+ store <64 x i16> %sext, ptr %a
ret void
}
-define void @icmp_eq_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @icmp_eq_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: icmp_eq_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%cmp = icmp eq <128 x i16> %op1, %op2
%sext = sext <128 x i1> %cmp to <128 x i16>
- store <128 x i16> %sext, <128 x i16>* %a
+ store <128 x i16> %sext, ptr %a
ret void
}
ret <4 x i32> %sext
}
-define void @icmp_eq_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @icmp_eq_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: icmp_eq_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%cmp = icmp eq <8 x i32> %op1, %op2
%sext = sext <8 x i1> %cmp to <8 x i32>
- store <8 x i32> %sext, <8 x i32>* %a
+ store <8 x i32> %sext, ptr %a
ret void
}
-define void @icmp_eq_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @icmp_eq_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: icmp_eq_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%cmp = icmp eq <16 x i32> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i32>
- store <16 x i32> %sext, <16 x i32>* %a
+ store <16 x i32> %sext, ptr %a
ret void
}
-define void @icmp_eq_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @icmp_eq_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: icmp_eq_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%cmp = icmp eq <32 x i32> %op1, %op2
%sext = sext <32 x i1> %cmp to <32 x i32>
- store <32 x i32> %sext, <32 x i32>* %a
+ store <32 x i32> %sext, ptr %a
ret void
}
-define void @icmp_eq_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @icmp_eq_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: icmp_eq_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%cmp = icmp eq <64 x i32> %op1, %op2
%sext = sext <64 x i1> %cmp to <64 x i32>
- store <64 x i32> %sext, <64 x i32>* %a
+ store <64 x i32> %sext, ptr %a
ret void
}
ret <2 x i64> %sext
}
-define void @icmp_eq_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @icmp_eq_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: icmp_eq_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%cmp = icmp eq <4 x i64> %op1, %op2
%sext = sext <4 x i1> %cmp to <4 x i64>
- store <4 x i64> %sext, <4 x i64>* %a
+ store <4 x i64> %sext, ptr %a
ret void
}
-define void @icmp_eq_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @icmp_eq_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: icmp_eq_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%cmp = icmp eq <8 x i64> %op1, %op2
%sext = sext <8 x i1> %cmp to <8 x i64>
- store <8 x i64> %sext, <8 x i64>* %a
+ store <8 x i64> %sext, ptr %a
ret void
}
-define void @icmp_eq_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @icmp_eq_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: icmp_eq_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%cmp = icmp eq <16 x i64> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i64>
- store <16 x i64> %sext, <16 x i64>* %a
+ store <16 x i64> %sext, ptr %a
ret void
}
-define void @icmp_eq_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @icmp_eq_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: icmp_eq_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%cmp = icmp eq <32 x i64> %op1, %op2
%sext = sext <32 x i1> %cmp to <32 x i64>
- store <32 x i64> %sext, <32 x i64>* %a
+ store <32 x i64> %sext, ptr %a
ret void
}
; ICMP NE
;
-define void @icmp_ne_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @icmp_ne_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: icmp_ne_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%cmp = icmp ne <32 x i8> %op1, %op2
%sext = sext <32 x i1> %cmp to <32 x i8>
- store <32 x i8> %sext, <32 x i8>* %a
+ store <32 x i8> %sext, ptr %a
ret void
}
; ICMP SGE
;
-define void @icmp_sge_v32i16(<32 x i16>* %a, <32 x i16>* %b) vscale_range(4,0) #0 {
+define void @icmp_sge_v32i16(ptr %a, ptr %b) vscale_range(4,0) #0 {
; CHECK-LABEL: icmp_sge_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%cmp = icmp sge <32 x i16> %op1, %op2
%sext = sext <32 x i1> %cmp to <32 x i16>
- store <32 x i16> %sext, <32 x i16>* %a
+ store <32 x i16> %sext, ptr %a
ret void
}
; ICMP SGT
;
-define void @icmp_sgt_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @icmp_sgt_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: icmp_sgt_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%cmp = icmp sgt <16 x i16> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i16>
- store <16 x i16> %sext, <16 x i16>* %a
+ store <16 x i16> %sext, ptr %a
ret void
}
; ICMP SLE
;
-define void @icmp_sle_v16i32(<16 x i32>* %a, <16 x i32>* %b) vscale_range(4,0) #0 {
+define void @icmp_sle_v16i32(ptr %a, ptr %b) vscale_range(4,0) #0 {
; CHECK-LABEL: icmp_sle_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%cmp = icmp sle <16 x i32> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i32>
- store <16 x i32> %sext, <16 x i32>* %a
+ store <16 x i32> %sext, ptr %a
ret void
}
; ICMP SLT
;
-define void @icmp_slt_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @icmp_slt_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: icmp_slt_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%cmp = icmp slt <8 x i32> %op1, %op2
%sext = sext <8 x i1> %cmp to <8 x i32>
- store <8 x i32> %sext, <8 x i32>* %a
+ store <8 x i32> %sext, ptr %a
ret void
}
; ICMP UGE
;
-define void @icmp_uge_v8i64(<8 x i64>* %a, <8 x i64>* %b) vscale_range(4,0) #0 {
+define void @icmp_uge_v8i64(ptr %a, ptr %b) vscale_range(4,0) #0 {
; CHECK-LABEL: icmp_uge_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%cmp = icmp uge <8 x i64> %op1, %op2
%sext = sext <8 x i1> %cmp to <8 x i64>
- store <8 x i64> %sext, <8 x i64>* %a
+ store <8 x i64> %sext, ptr %a
ret void
}
; ICMP UGT
;
-define void @icmp_ugt_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @icmp_ugt_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: icmp_ugt_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%cmp = icmp ugt <4 x i64> %op1, %op2
%sext = sext <4 x i1> %cmp to <4 x i64>
- store <4 x i64> %sext, <4 x i64>* %a
+ store <4 x i64> %sext, ptr %a
ret void
}
; ICMP ULE
;
-define void @icmp_ule_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @icmp_ule_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: icmp_ule_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%cmp = icmp ule <16 x i64> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i64>
- store <16 x i64> %sext, <16 x i64>* %a
+ store <16 x i64> %sext, ptr %a
ret void
}
; ICMP ULT
;
-define void @icmp_ult_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @icmp_ult_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: icmp_ult_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%cmp = icmp ult <32 x i64> %op1, %op2
%sext = sext <32 x i1> %cmp to <32 x i64>
- store <32 x i64> %sext, <32 x i64>* %a
+ store <32 x i64> %sext, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @sdiv_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(8,0) #0 {
+define void @sdiv_v32i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: sdiv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1b { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = sdiv <32 x i8> %op1, %op2
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @sdiv_v64i8(<64 x i8>* %a, <64 x i8>* %b) vscale_range(16,0) #0 {
+define void @sdiv_v64i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1b { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = sdiv <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @sdiv_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(16,0) #0 {
+define void @sdiv_v128i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: st1b { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = sdiv <128 x i8> %op1, %op2
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @sdiv_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @sdiv_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = sdiv <256 x i8> %op1, %op2
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @sdiv_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
+define void @sdiv_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: sdiv_v16i16:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q0, q1, [x1]
; VBITS_GE_512-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = sdiv <16 x i16> %op1, %op2
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @sdiv_v32i16(<32 x i16>* %a, <32 x i16>* %b) vscale_range(8,0) #0 {
+define void @sdiv_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: sdiv_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = sdiv <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @sdiv_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(16,0) #0 {
+define void @sdiv_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = sdiv <64 x i16> %op1, %op2
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @sdiv_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @sdiv_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = sdiv <128 x i16> %op1, %op2
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @sdiv_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @sdiv_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: sdiv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = sdiv <8 x i32> %op1, %op2
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @sdiv_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @sdiv_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: sdiv_v16i32:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
; VBITS_GE_512-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = sdiv <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @sdiv_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @sdiv_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: sdiv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = sdiv <32 x i32> %op1, %op2
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @sdiv_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @sdiv_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = sdiv <64 x i32> %op1, %op2
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @sdiv_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @sdiv_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: sdiv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = sdiv <4 x i64> %op1, %op2
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @sdiv_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @sdiv_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: sdiv_v8i64:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
; VBITS_GE_512-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = sdiv <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @sdiv_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @sdiv_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: sdiv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = sdiv <16 x i64> %op1, %op2
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @sdiv_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @sdiv_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = sdiv <32 x i64> %op1, %op2
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @udiv_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(8,0) #0 {
+define void @udiv_v32i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: udiv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1b { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = udiv <32 x i8> %op1, %op2
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @udiv_v64i8(<64 x i8>* %a, <64 x i8>* %b) vscale_range(16,0) #0 {
+define void @udiv_v64i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: udiv_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1b { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = udiv <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @udiv_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(16,0) #0 {
+define void @udiv_v128i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: udiv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
; CHECK-NEXT: st1b { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = udiv <128 x i8> %op1, %op2
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @udiv_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @udiv_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: udiv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = udiv <256 x i8> %op1, %op2
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @udiv_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
+define void @udiv_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: udiv_v16i16:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q0, q1, [x1]
; VBITS_GE_512-NEXT: udivr z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = udiv <16 x i16> %op1, %op2
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @udiv_v32i16(<32 x i16>* %a, <32 x i16>* %b) vscale_range(8,0) #0 {
+define void @udiv_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: udiv_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = udiv <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @udiv_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(16,0) #0 {
+define void @udiv_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: udiv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = udiv <64 x i16> %op1, %op2
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @udiv_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @udiv_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: udiv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = udiv <128 x i16> %op1, %op2
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @udiv_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @udiv_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: udiv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = udiv <8 x i32> %op1, %op2
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @udiv_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @udiv_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: udiv_v16i32:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
; VBITS_GE_512-NEXT: udiv z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = udiv <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @udiv_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @udiv_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: udiv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = udiv <32 x i32> %op1, %op2
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @udiv_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @udiv_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: udiv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = udiv <64 x i32> %op1, %op2
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @udiv_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @udiv_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: udiv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = udiv <4 x i64> %op1, %op2
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @udiv_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @udiv_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: udiv_v8i64:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
; VBITS_GE_512-NEXT: udiv z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = udiv <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @udiv_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @udiv_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: udiv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = udiv <16 x i64> %op1, %op2
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @udiv_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @udiv_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: udiv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = udiv <32 x i64> %op1, %op2
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
; This used to crash because isUnaryPredicate and BuildUDIV don't know how
; a SPLAT_VECTOR of fixed vector type should be handled.
-define void @udiv_constantsplat_v8i32(<8 x i32>* %a) vscale_range(2,0) #1 {
+define void @udiv_constantsplat_v8i32(ptr %a) vscale_range(2,0) #1 {
; CHECK-LABEL: udiv_constantsplat_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
+ %op1 = load <8 x i32>, ptr %a
%res = udiv <8 x i32> %op1, <i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95>
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
; NOTE: Covers the scenario where a SIGN_EXTEND_INREG is required, whose inreg
; type's element type is not byte based and thus cannot be lowered directly to
; an SVE instruction.
-define void @sext_v8i1_v8i32(<8 x i1> %a, <8 x i32>* %out) vscale_range(2,0) #0 {
+define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: sext_v8i1_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%b = sext <8 x i1> %a to <8 x i32>
- store <8 x i32> %b, <8 x i32>* %out
+ store <8 x i32> %b, ptr %out
ret void
}
; NOTE: Covers the scenario where a SIGN_EXTEND_INREG is required, whose inreg
; type's element type is not power-of-2 based and thus cannot be lowered
; directly to an SVE instruction.
-define void @sext_v4i3_v4i64(<4 x i3> %a, <4 x i64>* %out) vscale_range(2,0) #0 {
+define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: sext_v4i3_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%b = sext <4 x i3> %a to <4 x i64>
- store <4 x i64> %b, <4 x i64>* %out
+ store <4 x i64> %b, ptr %out
ret void
}
; sext i8 -> i16
;
-define void @sext_v16i8_v16i16(<16 x i8> %a, <16 x i16>* %out) vscale_range(2,0) #0 {
+define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: sext_v16i8_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%b = sext <16 x i8> %a to <16 x i16>
- store <16 x i16>%b, <16 x i16>* %out
+ store <16 x i16>%b, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the extend being combined with the load.
-define void @sext_v32i8_v32i16(<32 x i8>* %in, <32 x i16>* %out) #0 {
+define void @sext_v32i8_v32i16(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: sext_v32i8_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <32 x i8>, <32 x i8>* %in
+ %a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
%c = sext <32 x i8> %b to <32 x i16>
- store <32 x i16> %c, <32 x i16>* %out
+ store <32 x i16> %c, ptr %out
ret void
}
-define void @sext_v64i8_v64i16(<64 x i8>* %in, <64 x i16>* %out) vscale_range(8,0) #0 {
+define void @sext_v64i8_v64i16(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: sext_v64i8_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: sunpklo z0.h, z0.b
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <64 x i8>, <64 x i8>* %in
+ %a = load <64 x i8>, ptr %in
%b = add <64 x i8> %a, %a
%c = sext <64 x i8> %b to <64 x i16>
- store <64 x i16> %c, <64 x i16>* %out
+ store <64 x i16> %c, ptr %out
ret void
}
-define void @sext_v128i8_v128i16(<128 x i8>* %in, <128 x i16>* %out) vscale_range(16,0) #0 {
+define void @sext_v128i8_v128i16(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: sext_v128i8_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: sunpklo z0.h, z0.b
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <128 x i8>, <128 x i8>* %in
+ %a = load <128 x i8>, ptr %in
%b = add <128 x i8> %a, %a
%c = sext <128 x i8> %b to <128 x i16>
- store <128 x i16> %c, <128 x i16>* %out
+ store <128 x i16> %c, ptr %out
ret void
}
; sext i8 -> i32
;
-define void @sext_v8i8_v8i32(<8 x i8> %a, <8 x i32>* %out) vscale_range(2,0) #0 {
+define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: sext_v8i8_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%b = sext <8 x i8> %a to <8 x i32>
- store <8 x i32>%b, <8 x i32>* %out
+ store <8 x i32>%b, ptr %out
ret void
}
-define void @sext_v16i8_v16i32(<16 x i8> %a, <16 x i32>* %out) #0 {
+define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) #0 {
; VBITS_GE_256-LABEL: sext_v16i8_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%b = sext <16 x i8> %a to <16 x i32>
- store <16 x i32> %b, <16 x i32>* %out
+ store <16 x i32> %b, ptr %out
ret void
}
-define void @sext_v32i8_v32i32(<32 x i8>* %in, <32 x i32>* %out) vscale_range(8,0) #0 {
+define void @sext_v32i8_v32i32(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: sext_v32i8_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i8>, <32 x i8>* %in
+ %a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
%c = sext <32 x i8> %b to <32 x i32>
- store <32 x i32> %c, <32 x i32>* %out
+ store <32 x i32> %c, ptr %out
ret void
}
-define void @sext_v64i8_v64i32(<64 x i8>* %in, <64 x i32>* %out) vscale_range(16,0) #0 {
+define void @sext_v64i8_v64i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: sext_v64i8_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <64 x i8>, <64 x i8>* %in
+ %a = load <64 x i8>, ptr %in
%b = add <64 x i8> %a, %a
%c = sext <64 x i8> %b to <64 x i32>
- store <64 x i32> %c, <64 x i32>* %out
+ store <64 x i32> %c, ptr %out
ret void
}
; NOTE: v4i8 is an unpacked typed stored within a v4i16 container. The sign
; extend is a two step process where the container is any_extend'd with the
; result feeding an inreg sign extend.
-define void @sext_v4i8_v4i64(<4 x i8> %a, <4 x i64>* %out) vscale_range(2,0) #0 {
+define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: sext_v4i8_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%b = sext <4 x i8> %a to <4 x i64>
- store <4 x i64>%b, <4 x i64>* %out
+ store <4 x i64>%b, ptr %out
ret void
}
-define void @sext_v8i8_v8i64(<8 x i8> %a, <8 x i64>* %out) #0 {
+define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) #0 {
; VBITS_GE_256-LABEL: sext_v8i8_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sshll v0.8h, v0.8b, #0
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%b = sext <8 x i8> %a to <8 x i64>
- store <8 x i64>%b, <8 x i64>* %out
+ store <8 x i64>%b, ptr %out
ret void
}
-define void @sext_v16i8_v16i64(<16 x i8> %a, <16 x i64>* %out) vscale_range(8,0) #0 {
+define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: sext_v16i8_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%b = sext <16 x i8> %a to <16 x i64>
- store <16 x i64> %b, <16 x i64>* %out
+ store <16 x i64> %b, ptr %out
ret void
}
-define void @sext_v32i8_v32i64(<32 x i8>* %in, <32 x i64>* %out) vscale_range(16,0) #0 {
+define void @sext_v32i8_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: sext_v32i8_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i8>, <32 x i8>* %in
+ %a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
%c = sext <32 x i8> %b to <32 x i64>
- store <32 x i64> %c, <32 x i64>* %out
+ store <32 x i64> %c, ptr %out
ret void
}
; sext i16 -> i32
;
-define void @sext_v8i16_v8i32(<8 x i16> %a, <8 x i32>* %out) vscale_range(2,0) #0 {
+define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: sext_v8i16_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%b = sext <8 x i16> %a to <8 x i32>
- store <8 x i32>%b, <8 x i32>* %out
+ store <8 x i32>%b, ptr %out
ret void
}
-define void @sext_v16i16_v16i32(<16 x i16>* %in, <16 x i32>* %out) #0 {
+define void @sext_v16i16_v16i32(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: sext_v16i16_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <16 x i16>, <16 x i16>* %in
+ %a = load <16 x i16>, ptr %in
%b = add <16 x i16> %a, %a
%c = sext <16 x i16> %b to <16 x i32>
- store <16 x i32> %c, <16 x i32>* %out
+ store <16 x i32> %c, ptr %out
ret void
}
-define void @sext_v32i16_v32i32(<32 x i16>* %in, <32 x i32>* %out) vscale_range(8,0) #0 {
+define void @sext_v32i16_v32i32(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: sext_v32i16_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i16>, <32 x i16>* %in
+ %a = load <32 x i16>, ptr %in
%b = add <32 x i16> %a, %a
%c = sext <32 x i16> %b to <32 x i32>
- store <32 x i32> %c, <32 x i32>* %out
+ store <32 x i32> %c, ptr %out
ret void
}
-define void @sext_v64i16_v64i32(<64 x i16>* %in, <64 x i32>* %out) vscale_range(16,0) #0 {
+define void @sext_v64i16_v64i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: sext_v64i16_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <64 x i16>, <64 x i16>* %in
+ %a = load <64 x i16>, ptr %in
%b = add <64 x i16> %a, %a
%c = sext <64 x i16> %b to <64 x i32>
- store <64 x i32> %c, <64 x i32>* %out
+ store <64 x i32> %c, ptr %out
ret void
}
; sext i16 -> i64
;
-define void @sext_v4i16_v4i64(<4 x i16> %a, <4 x i64>* %out) vscale_range(2,0) #0 {
+define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: sext_v4i16_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%b = sext <4 x i16> %a to <4 x i64>
- store <4 x i64>%b, <4 x i64>* %out
+ store <4 x i64>%b, ptr %out
ret void
}
-define void @sext_v8i16_v8i64(<8 x i16> %a, <8 x i64>* %out) #0 {
+define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) #0 {
; VBITS_GE_256-LABEL: sext_v8i16_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%b = sext <8 x i16> %a to <8 x i64>
- store <8 x i64>%b, <8 x i64>* %out
+ store <8 x i64>%b, ptr %out
ret void
}
-define void @sext_v16i16_v16i64(<16 x i16>* %in, <16 x i64>* %out) vscale_range(8,0) #0 {
+define void @sext_v16i16_v16i64(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: sext_v16i16_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <16 x i16>, <16 x i16>* %in
+ %a = load <16 x i16>, ptr %in
%b = add <16 x i16> %a, %a
%c = sext <16 x i16> %b to <16 x i64>
- store <16 x i64> %c, <16 x i64>* %out
+ store <16 x i64> %c, ptr %out
ret void
}
-define void @sext_v32i16_v32i64(<32 x i16>* %in, <32 x i64>* %out) vscale_range(16,0) #0 {
+define void @sext_v32i16_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: sext_v32i16_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i16>, <32 x i16>* %in
+ %a = load <32 x i16>, ptr %in
%b = add <32 x i16> %a, %a
%c = sext <32 x i16> %b to <32 x i64>
- store <32 x i64> %c, <32 x i64>* %out
+ store <32 x i64> %c, ptr %out
ret void
}
; sext i32 -> i64
;
-define void @sext_v4i32_v4i64(<4 x i32> %a, <4 x i64>* %out) vscale_range(2,0) #0 {
+define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: sext_v4i32_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%b = sext <4 x i32> %a to <4 x i64>
- store <4 x i64>%b, <4 x i64>* %out
+ store <4 x i64>%b, ptr %out
ret void
}
-define void @sext_v8i32_v8i64(<8 x i32>* %in, <8 x i64>* %out) #0 {
+define void @sext_v8i32_v8i64(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: sext_v8i32_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <8 x i32>, <8 x i32>* %in
+ %a = load <8 x i32>, ptr %in
%b = add <8 x i32> %a, %a
%c = sext <8 x i32> %b to <8 x i64>
- store <8 x i64> %c, <8 x i64>* %out
+ store <8 x i64> %c, ptr %out
ret void
}
-define void @sext_v16i32_v16i64(<16 x i32>* %in, <16 x i64>* %out) vscale_range(8,0) #0 {
+define void @sext_v16i32_v16i64(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: sext_v16i32_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <16 x i32>, <16 x i32>* %in
+ %a = load <16 x i32>, ptr %in
%b = add <16 x i32> %a, %a
%c = sext <16 x i32> %b to <16 x i64>
- store <16 x i64> %c, <16 x i64>* %out
+ store <16 x i64> %c, ptr %out
ret void
}
-define void @sext_v32i32_v32i64(<32 x i32>* %in, <32 x i64>* %out) vscale_range(16,0) #0 {
+define void @sext_v32i32_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: sext_v32i32_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i32>, <32 x i32>* %in
+ %a = load <32 x i32>, ptr %in
%b = add <32 x i32> %a, %a
%c = sext <32 x i32> %b to <32 x i64>
- store <32 x i64> %c, <32 x i64>* %out
+ store <32 x i64> %c, ptr %out
ret void
}
; zext i8 -> i16
;
-define void @zext_v16i8_v16i16(<16 x i8> %a, <16 x i16>* %out) vscale_range(2,0) #0 {
+define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: zext_v16i8_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%b = zext <16 x i8> %a to <16 x i16>
- store <16 x i16>%b, <16 x i16>* %out
+ store <16 x i16>%b, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the extend being combined with the load.
-define void @zext_v32i8_v32i16(<32 x i8>* %in, <32 x i16>* %out) #0 {
+define void @zext_v32i8_v32i16(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: zext_v32i8_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <32 x i8>, <32 x i8>* %in
+ %a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
%c = zext <32 x i8> %b to <32 x i16>
- store <32 x i16> %c, <32 x i16>* %out
+ store <32 x i16> %c, ptr %out
ret void
}
-define void @zext_v64i8_v64i16(<64 x i8>* %in, <64 x i16>* %out) vscale_range(8,0) #0 {
+define void @zext_v64i8_v64i16(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: zext_v64i8_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <64 x i8>, <64 x i8>* %in
+ %a = load <64 x i8>, ptr %in
%b = add <64 x i8> %a, %a
%c = zext <64 x i8> %b to <64 x i16>
- store <64 x i16> %c, <64 x i16>* %out
+ store <64 x i16> %c, ptr %out
ret void
}
-define void @zext_v128i8_v128i16(<128 x i8>* %in, <128 x i16>* %out) vscale_range(16,0) #0 {
+define void @zext_v128i8_v128i16(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: zext_v128i8_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <128 x i8>, <128 x i8>* %in
+ %a = load <128 x i8>, ptr %in
%b = add <128 x i8> %a, %a
%c = zext <128 x i8> %b to <128 x i16>
- store <128 x i16> %c, <128 x i16>* %out
+ store <128 x i16> %c, ptr %out
ret void
}
; zext i8 -> i32
;
-define void @zext_v8i8_v8i32(<8 x i8> %a, <8 x i32>* %out) vscale_range(2,0) #0 {
+define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: zext_v8i8_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%b = zext <8 x i8> %a to <8 x i32>
- store <8 x i32>%b, <8 x i32>* %out
+ store <8 x i32>%b, ptr %out
ret void
}
-define void @zext_v16i8_v16i32(<16 x i8> %a, <16 x i32>* %out) #0 {
+define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) #0 {
; VBITS_GE_256-LABEL: zext_v16i8_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%b = zext <16 x i8> %a to <16 x i32>
- store <16 x i32> %b, <16 x i32>* %out
+ store <16 x i32> %b, ptr %out
ret void
}
-define void @zext_v32i8_v32i32(<32 x i8>* %in, <32 x i32>* %out) vscale_range(8,0) #0 {
+define void @zext_v32i8_v32i32(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: zext_v32i8_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i8>, <32 x i8>* %in
+ %a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
%c = zext <32 x i8> %b to <32 x i32>
- store <32 x i32> %c, <32 x i32>* %out
+ store <32 x i32> %c, ptr %out
ret void
}
-define void @zext_v64i8_v64i32(<64 x i8>* %in, <64 x i32>* %out) vscale_range(16,0) #0 {
+define void @zext_v64i8_v64i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: zext_v64i8_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <64 x i8>, <64 x i8>* %in
+ %a = load <64 x i8>, ptr %in
%b = add <64 x i8> %a, %a
%c = zext <64 x i8> %b to <64 x i32>
- store <64 x i32> %c, <64 x i32>* %out
+ store <64 x i32> %c, ptr %out
ret void
}
; NOTE: v4i8 is an unpacked typed stored within a v4i16 container. The zero
; extend is a two step process where the container is zero_extend_inreg'd with
; the result feeding a normal zero extend from halfs to doublewords.
-define void @zext_v4i8_v4i64(<4 x i8> %a, <4 x i64>* %out) vscale_range(2,0) #0 {
+define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: zext_v4i8_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%b = zext <4 x i8> %a to <4 x i64>
- store <4 x i64>%b, <4 x i64>* %out
+ store <4 x i64>%b, ptr %out
ret void
}
-define void @zext_v8i8_v8i64(<8 x i8> %a, <8 x i64>* %out) #0 {
+define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) #0 {
; VBITS_GE_256-LABEL: zext_v8i8_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ushll v0.8h, v0.8b, #0
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%b = zext <8 x i8> %a to <8 x i64>
- store <8 x i64>%b, <8 x i64>* %out
+ store <8 x i64>%b, ptr %out
ret void
}
-define void @zext_v16i8_v16i64(<16 x i8> %a, <16 x i64>* %out) vscale_range(8,0) #0 {
+define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: zext_v16i8_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%b = zext <16 x i8> %a to <16 x i64>
- store <16 x i64> %b, <16 x i64>* %out
+ store <16 x i64> %b, ptr %out
ret void
}
-define void @zext_v32i8_v32i64(<32 x i8>* %in, <32 x i64>* %out) vscale_range(16,0) #0 {
+define void @zext_v32i8_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: zext_v32i8_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i8>, <32 x i8>* %in
+ %a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
%c = zext <32 x i8> %b to <32 x i64>
- store <32 x i64> %c, <32 x i64>* %out
+ store <32 x i64> %c, ptr %out
ret void
}
; zext i16 -> i32
;
-define void @zext_v8i16_v8i32(<8 x i16> %a, <8 x i32>* %out) vscale_range(2,0) #0 {
+define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: zext_v8i16_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%b = zext <8 x i16> %a to <8 x i32>
- store <8 x i32>%b, <8 x i32>* %out
+ store <8 x i32>%b, ptr %out
ret void
}
-define void @zext_v16i16_v16i32(<16 x i16>* %in, <16 x i32>* %out) #0 {
+define void @zext_v16i16_v16i32(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: zext_v16i16_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <16 x i16>, <16 x i16>* %in
+ %a = load <16 x i16>, ptr %in
%b = add <16 x i16> %a, %a
%c = zext <16 x i16> %b to <16 x i32>
- store <16 x i32> %c, <16 x i32>* %out
+ store <16 x i32> %c, ptr %out
ret void
}
-define void @zext_v32i16_v32i32(<32 x i16>* %in, <32 x i32>* %out) vscale_range(8,0) #0 {
+define void @zext_v32i16_v32i32(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: zext_v32i16_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i16>, <32 x i16>* %in
+ %a = load <32 x i16>, ptr %in
%b = add <32 x i16> %a, %a
%c = zext <32 x i16> %b to <32 x i32>
- store <32 x i32> %c, <32 x i32>* %out
+ store <32 x i32> %c, ptr %out
ret void
}
-define void @zext_v64i16_v64i32(<64 x i16>* %in, <64 x i32>* %out) vscale_range(16,0) #0 {
+define void @zext_v64i16_v64i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: zext_v64i16_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <64 x i16>, <64 x i16>* %in
+ %a = load <64 x i16>, ptr %in
%b = add <64 x i16> %a, %a
%c = zext <64 x i16> %b to <64 x i32>
- store <64 x i32> %c, <64 x i32>* %out
+ store <64 x i32> %c, ptr %out
ret void
}
; zext i16 -> i64
;
-define void @zext_v4i16_v4i64(<4 x i16> %a, <4 x i64>* %out) vscale_range(2,0) #0 {
+define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: zext_v4i16_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%b = zext <4 x i16> %a to <4 x i64>
- store <4 x i64>%b, <4 x i64>* %out
+ store <4 x i64>%b, ptr %out
ret void
}
-define void @zext_v8i16_v8i64(<8 x i16> %a, <8 x i64>* %out) #0 {
+define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) #0 {
; VBITS_GE_256-LABEL: zext_v8i16_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%b = zext <8 x i16> %a to <8 x i64>
- store <8 x i64>%b, <8 x i64>* %out
+ store <8 x i64>%b, ptr %out
ret void
}
-define void @zext_v16i16_v16i64(<16 x i16>* %in, <16 x i64>* %out) vscale_range(8,0) #0 {
+define void @zext_v16i16_v16i64(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: zext_v16i16_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <16 x i16>, <16 x i16>* %in
+ %a = load <16 x i16>, ptr %in
%b = add <16 x i16> %a, %a
%c = zext <16 x i16> %b to <16 x i64>
- store <16 x i64> %c, <16 x i64>* %out
+ store <16 x i64> %c, ptr %out
ret void
}
-define void @zext_v32i16_v32i64(<32 x i16>* %in, <32 x i64>* %out) vscale_range(16,0) #0 {
+define void @zext_v32i16_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: zext_v32i16_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i16>, <32 x i16>* %in
+ %a = load <32 x i16>, ptr %in
%b = add <32 x i16> %a, %a
%c = zext <32 x i16> %b to <32 x i64>
- store <32 x i64> %c, <32 x i64>* %out
+ store <32 x i64> %c, ptr %out
ret void
}
; zext i32 -> i64
;
-define void @zext_v4i32_v4i64(<4 x i32> %a, <4 x i64>* %out) vscale_range(2,0) #0 {
+define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: zext_v4i32_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%b = zext <4 x i32> %a to <4 x i64>
- store <4 x i64>%b, <4 x i64>* %out
+ store <4 x i64>%b, ptr %out
ret void
}
-define void @zext_v8i32_v8i64(<8 x i32>* %in, <8 x i64>* %out) #0 {
+define void @zext_v8i32_v8i64(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: zext_v8i32_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <8 x i32>, <8 x i32>* %in
+ %a = load <8 x i32>, ptr %in
%b = add <8 x i32> %a, %a
%c = zext <8 x i32> %b to <8 x i64>
- store <8 x i64> %c, <8 x i64>* %out
+ store <8 x i64> %c, ptr %out
ret void
}
-define void @zext_v16i32_v16i64(<16 x i32>* %in, <16 x i64>* %out) vscale_range(8,0) #0 {
+define void @zext_v16i32_v16i64(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: zext_v16i32_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <16 x i32>, <16 x i32>* %in
+ %a = load <16 x i32>, ptr %in
%b = add <16 x i32> %a, %a
%c = zext <16 x i32> %b to <16 x i64>
- store <16 x i64> %c, <16 x i64>* %out
+ store <16 x i64> %c, ptr %out
ret void
}
-define void @zext_v32i32_v32i64(<32 x i32>* %in, <32 x i64>* %out) vscale_range(16,0) #0 {
+define void @zext_v32i32_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: zext_v32i32_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i32>, <32 x i32>* %in
+ %a = load <32 x i32>, ptr %in
%b = add <32 x i32> %a, %a
%c = zext <32 x i32> %b to <32 x i64>
- store <32 x i64> %c, <32 x i64>* %out
+ store <32 x i64> %c, ptr %out
ret void
}
; ADD
;
-define void @add_v64i8(<64 x i8>* %a) #0 {
+define void @add_v64i8(ptr %a) #0 {
; CHECK-LABEL: add_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: add z0.b, z0.b, #7 // =0x7
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%res = add <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @add_v32i16(<32 x i16>* %a) #0 {
+define void @add_v32i16(ptr %a) #0 {
; CHECK-LABEL: add_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: add z0.h, z0.h, #15 // =0xf
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%res = add <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @add_v16i32(<16 x i32>* %a) #0 {
+define void @add_v16i32(ptr %a) #0 {
; CHECK-LABEL: add_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: add z0.s, z0.s, #31 // =0x1f
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 31, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%res = add <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @add_v8i64(<8 x i64>* %a) #0 {
+define void @add_v8i64(ptr %a) #0 {
; CHECK-LABEL: add_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: add z0.d, z0.d, #63 // =0x3f
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%res = add <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
; AND
;
-define void @and_v64i8(<64 x i8>* %a) #0 {
+define void @and_v64i8(ptr %a) #0 {
; CHECK-LABEL: and_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: and z0.b, z0.b, #0x7
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%res = and <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @and_v32i16(<32 x i16>* %a) #0 {
+define void @and_v32i16(ptr %a) #0 {
; CHECK-LABEL: and_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: and z0.h, z0.h, #0xf
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%res = and <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @and_v16i32(<16 x i32>* %a) #0 {
+define void @and_v16i32(ptr %a) #0 {
; CHECK-LABEL: and_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: and z0.s, z0.s, #0x1f
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 31, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%res = and <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @and_v8i64(<8 x i64>* %a) #0 {
+define void @and_v8i64(ptr %a) #0 {
; CHECK-LABEL: and_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: and z0.d, z0.d, #0x3f
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%res = and <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
; ASHR
;
-define void @ashr_v64i8(<64 x i8>* %a) #0 {
+define void @ashr_v64i8(ptr %a) #0 {
; CHECK-LABEL: ashr_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%res = ashr <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @ashr_v32i16(<32 x i16>* %a) #0 {
+define void @ashr_v32i16(ptr %a) #0 {
; CHECK-LABEL: ashr_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%res = ashr <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @ashr_v16i32(<16 x i32>* %a) #0 {
+define void @ashr_v16i32(ptr %a) #0 {
; CHECK-LABEL: ashr_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 31, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%res = ashr <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @ashr_v8i64(<8 x i64>* %a) #0 {
+define void @ashr_v8i64(ptr %a) #0 {
; CHECK-LABEL: ashr_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%res = ashr <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
; ICMP
;
-define void @icmp_eq_v64i8(<64 x i8>* %a) #0 {
+define void @icmp_eq_v64i8(ptr %a) #0 {
; CHECK-LABEL: icmp_eq_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%cmp = icmp eq <64 x i8> %op1, %op2
%res = sext <64 x i1> %cmp to <64 x i8>
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @icmp_sge_v32i16(<32 x i16>* %a) #0 {
+define void @icmp_sge_v32i16(ptr %a) #0 {
; CHECK-LABEL: icmp_sge_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%cmp = icmp sge <32 x i16> %op1, %op2
%res = sext <32 x i1> %cmp to <32 x i16>
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @icmp_sgt_v16i32(<16 x i32>* %a) #0 {
+define void @icmp_sgt_v16i32(ptr %a) #0 {
; CHECK-LABEL: icmp_sgt_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 -16, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%cmp = icmp sgt <16 x i32> %op1, %op2
%res = sext <16 x i1> %cmp to <16 x i32>
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @icmp_ult_v8i64(<8 x i64>* %a) #0 {
+define void @icmp_ult_v8i64(ptr %a) #0 {
; CHECK-LABEL: icmp_ult_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%cmp = icmp ult <8 x i64> %op1, %op2
%res = sext <8 x i1> %cmp to <8 x i64>
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
; LSHR
;
-define void @lshr_v64i8(<64 x i8>* %a) #0 {
+define void @lshr_v64i8(ptr %a) #0 {
; CHECK-LABEL: lshr_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #7
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%res = lshr <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @lshr_v32i16(<32 x i16>* %a) #0 {
+define void @lshr_v32i16(ptr %a) #0 {
; CHECK-LABEL: lshr_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #15
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%res = lshr <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @lshr_v16i32(<16 x i32>* %a) #0 {
+define void @lshr_v16i32(ptr %a) #0 {
; CHECK-LABEL: lshr_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #31
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 31, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%res = lshr <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @lshr_v8i64(<8 x i64>* %a) #0 {
+define void @lshr_v8i64(ptr %a) #0 {
; CHECK-LABEL: lshr_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #63
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%res = lshr <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
; MUL
;
-define void @mul_v64i8(<64 x i8>* %a) #0 {
+define void @mul_v64i8(ptr %a) #0 {
; CHECK-LABEL: mul_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%res = mul <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @mul_v32i16(<32 x i16>* %a) #0 {
+define void @mul_v32i16(ptr %a) #0 {
; CHECK-LABEL: mul_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%res = mul <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @mul_v16i32(<16 x i32>* %a) #0 {
+define void @mul_v16i32(ptr %a) #0 {
; CHECK-LABEL: mul_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 31, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%res = mul <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @mul_v8i64(<8 x i64>* %a) #0 {
+define void @mul_v8i64(ptr %a) #0 {
; CHECK-LABEL: mul_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%res = mul <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
; OR
;
-define void @or_v64i8(<64 x i8>* %a) #0 {
+define void @or_v64i8(ptr %a) #0 {
; CHECK-LABEL: or_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: orr z0.b, z0.b, #0x7
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%res = or <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @or_v32i16(<32 x i16>* %a) #0 {
+define void @or_v32i16(ptr %a) #0 {
; CHECK-LABEL: or_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: orr z0.h, z0.h, #0xf
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%res = or <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @or_v16i32(<16 x i32>* %a) #0 {
+define void @or_v16i32(ptr %a) #0 {
; CHECK-LABEL: or_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: orr z0.s, z0.s, #0x1f
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 31, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%res = or <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @or_v8i64(<8 x i64>* %a) #0 {
+define void @or_v8i64(ptr %a) #0 {
; CHECK-LABEL: or_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: orr z0.d, z0.d, #0x3f
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%res = or <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
; SHL
;
-define void @shl_v64i8(<64 x i8>* %a) #0 {
+define void @shl_v64i8(ptr %a) #0 {
; CHECK-LABEL: shl_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%res = shl <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @shl_v32i16(<32 x i16>* %a) #0 {
+define void @shl_v32i16(ptr %a) #0 {
; CHECK-LABEL: shl_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%res = shl <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @shl_v16i32(<16 x i32>* %a) #0 {
+define void @shl_v16i32(ptr %a) #0 {
; CHECK-LABEL: shl_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 31, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%res = shl <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @shl_v8i64(<8 x i64>* %a) #0 {
+define void @shl_v8i64(ptr %a) #0 {
; CHECK-LABEL: shl_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%res = shl <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
; SMAX
;
-define void @smax_v64i8(<64 x i8>* %a) #0 {
+define void @smax_v64i8(ptr %a) #0 {
; CHECK-LABEL: smax_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%res = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %op1, <64 x i8> %op2)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @smax_v32i16(<32 x i16>* %a) #0 {
+define void @smax_v32i16(ptr %a) #0 {
; CHECK-LABEL: smax_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%res = call <32 x i16> @llvm.smax.v32i16(<32 x i16> %op1, <32 x i16> %op2)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @smax_v16i32(<16 x i32>* %a) #0 {
+define void @smax_v16i32(ptr %a) #0 {
; CHECK-LABEL: smax_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 31, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%res = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %op1, <16 x i32> %op2)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @smax_v8i64(<8 x i64>* %a) #0 {
+define void @smax_v8i64(ptr %a) #0 {
; CHECK-LABEL: smax_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%res = call <8 x i64> @llvm.smax.v8i64(<8 x i64> %op1, <8 x i64> %op2)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
; SMIN
;
-define void @smin_v64i8(<64 x i8>* %a) #0 {
+define void @smin_v64i8(ptr %a) #0 {
; CHECK-LABEL: smin_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%res = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %op1, <64 x i8> %op2)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @smin_v32i16(<32 x i16>* %a) #0 {
+define void @smin_v32i16(ptr %a) #0 {
; CHECK-LABEL: smin_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%res = call <32 x i16> @llvm.smin.v32i16(<32 x i16> %op1, <32 x i16> %op2)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @smin_v16i32(<16 x i32>* %a) #0 {
+define void @smin_v16i32(ptr %a) #0 {
; CHECK-LABEL: smin_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 31, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%res = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %op1, <16 x i32> %op2)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @smin_v8i64(<8 x i64>* %a) #0 {
+define void @smin_v8i64(ptr %a) #0 {
; CHECK-LABEL: smin_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%res = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %op1, <8 x i64> %op2)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
; SUB
;
-define void @sub_v64i8(<64 x i8>* %a) #0 {
+define void @sub_v64i8(ptr %a) #0 {
; CHECK-LABEL: sub_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: sub z0.b, z0.b, #7 // =0x7
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%res = sub <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @sub_v32i16(<32 x i16>* %a) #0 {
+define void @sub_v32i16(ptr %a) #0 {
; CHECK-LABEL: sub_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: sub z0.h, z0.h, #15 // =0xf
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%res = sub <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @sub_v16i32(<16 x i32>* %a) #0 {
+define void @sub_v16i32(ptr %a) #0 {
; CHECK-LABEL: sub_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: sub z0.s, z0.s, #31 // =0x1f
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 31, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%res = sub <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @sub_v8i64(<8 x i64>* %a) #0 {
+define void @sub_v8i64(ptr %a) #0 {
; CHECK-LABEL: sub_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: sub z0.d, z0.d, #63 // =0x3f
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%res = sub <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
; UMAX
;
-define void @umax_v64i8(<64 x i8>* %a) #0 {
+define void @umax_v64i8(ptr %a) #0 {
; CHECK-LABEL: umax_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%res = call <64 x i8> @llvm.umax.v64i8(<64 x i8> %op1, <64 x i8> %op2)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @umax_v32i16(<32 x i16>* %a) #0 {
+define void @umax_v32i16(ptr %a) #0 {
; CHECK-LABEL: umax_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%res = call <32 x i16> @llvm.umax.v32i16(<32 x i16> %op1, <32 x i16> %op2)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @umax_v16i32(<16 x i32>* %a) #0 {
+define void @umax_v16i32(ptr %a) #0 {
; CHECK-LABEL: umax_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 31, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%res = call <16 x i32> @llvm.umax.v16i32(<16 x i32> %op1, <16 x i32> %op2)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @umax_v8i64(<8 x i64>* %a) #0 {
+define void @umax_v8i64(ptr %a) #0 {
; CHECK-LABEL: umax_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%res = call <8 x i64> @llvm.umax.v8i64(<8 x i64> %op1, <8 x i64> %op2)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
; UMIN
;
-define void @umin_v64i8(<64 x i8>* %a) #0 {
+define void @umin_v64i8(ptr %a) #0 {
; CHECK-LABEL: umin_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%res = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %op1, <64 x i8> %op2)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @umin_v32i16(<32 x i16>* %a) #0 {
+define void @umin_v32i16(ptr %a) #0 {
; CHECK-LABEL: umin_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%res = call <32 x i16> @llvm.umin.v32i16(<32 x i16> %op1, <32 x i16> %op2)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @umin_v16i32(<16 x i32>* %a) #0 {
+define void @umin_v16i32(ptr %a) #0 {
; CHECK-LABEL: umin_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 31, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%res = call <16 x i32> @llvm.umin.v16i32(<16 x i32> %op1, <16 x i32> %op2)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @umin_v8i64(<8 x i64>* %a) #0 {
+define void @umin_v8i64(ptr %a) #0 {
; CHECK-LABEL: umin_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%res = call <8 x i64> @llvm.umin.v8i64(<8 x i64> %op1, <8 x i64> %op2)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
; XOR
;
-define void @xor_v64i8(<64 x i8>* %a) #0 {
+define void @xor_v64i8(ptr %a) #0 {
; CHECK-LABEL: xor_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: eor z0.b, z0.b, #0x7
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%ins = insertelement <64 x i8> undef, i8 7, i64 0
%op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
%res = xor <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @xor_v32i16(<32 x i16>* %a) #0 {
+define void @xor_v32i16(ptr %a) #0 {
; CHECK-LABEL: xor_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: eor z0.h, z0.h, #0xf
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%ins = insertelement <32 x i16> undef, i16 15, i64 0
%op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
%res = xor <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @xor_v16i32(<16 x i32>* %a) #0 {
+define void @xor_v16i32(ptr %a) #0 {
; CHECK-LABEL: xor_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: eor z0.s, z0.s, #0x1f
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%ins = insertelement <16 x i32> undef, i32 31, i64 0
%op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
%res = xor <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @xor_v8i64(<8 x i64>* %a) #0 {
+define void @xor_v8i64(ptr %a) #0 {
; CHECK-LABEL: xor_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: eor z0.d, z0.d, #0x3f
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%ins = insertelement <8 x i64> undef, i64 63, i64 0
%op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
%res = xor <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @and_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @and_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: and_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = and <32 x i8> %op1, %op2
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @and_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @and_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: and_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: and z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = and <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @and_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @and_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: and_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = and <128 x i8> %op1, %op2
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @and_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @and_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: and_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = and <256 x i8> %op1, %op2
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @and_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @and_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: and_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = and <16 x i16> %op1, %op2
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @and_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @and_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: and_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: and z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = and <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @and_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @and_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: and_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = and <64 x i16> %op1, %op2
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @and_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @and_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: and_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = and <128 x i16> %op1, %op2
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @and_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @and_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: and_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = and <8 x i32> %op1, %op2
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @and_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @and_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: and_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: and z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = and <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @and_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @and_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: and_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = and <32 x i32> %op1, %op2
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @and_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @and_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: and_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = and <64 x i32> %op1, %op2
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @and_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @and_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: and_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = and <4 x i64> %op1, %op2
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @and_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @and_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: and_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: and z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = and <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @and_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @and_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: and_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = and <16 x i64> %op1, %op2
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @and_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @and_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: and_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = and <32 x i64> %op1, %op2
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @or_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @or_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: or_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = or <32 x i8> %op1, %op2
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @or_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @or_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: or_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = or <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @or_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @or_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: or_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = or <128 x i8> %op1, %op2
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @or_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @or_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: or_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = or <256 x i8> %op1, %op2
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @or_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @or_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: or_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = or <16 x i16> %op1, %op2
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @or_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @or_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: or_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = or <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @or_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @or_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: or_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = or <64 x i16> %op1, %op2
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @or_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @or_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: or_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = or <128 x i16> %op1, %op2
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @or_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @or_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: or_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = or <8 x i32> %op1, %op2
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @or_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @or_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: or_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = or <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @or_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @or_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: or_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = or <32 x i32> %op1, %op2
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @or_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @or_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: or_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = or <64 x i32> %op1, %op2
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @or_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @or_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: or_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = or <4 x i64> %op1, %op2
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @or_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @or_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: or_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = or <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @or_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @or_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: or_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = or <16 x i64> %op1, %op2
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @or_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @or_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: or_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = or <32 x i64> %op1, %op2
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @xor_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @xor_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: xor_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = xor <32 x i8> %op1, %op2
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @xor_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @xor_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: xor_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: eor z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = xor <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @xor_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @xor_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: xor_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = xor <128 x i8> %op1, %op2
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @xor_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @xor_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: xor_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = xor <256 x i8> %op1, %op2
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @xor_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @xor_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: xor_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = xor <16 x i16> %op1, %op2
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @xor_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @xor_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: xor_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: eor z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = xor <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @xor_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @xor_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: xor_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = xor <64 x i16> %op1, %op2
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @xor_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @xor_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: xor_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = xor <128 x i16> %op1, %op2
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @xor_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @xor_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: xor_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = xor <8 x i32> %op1, %op2
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @xor_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @xor_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: xor_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: eor z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = xor <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @xor_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @xor_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: xor_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = xor <32 x i32> %op1, %op2
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @xor_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @xor_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: xor_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = xor <64 x i32> %op1, %op2
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @xor_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @xor_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: xor_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = xor <4 x i64> %op1, %op2
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @xor_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @xor_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: xor_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: eor z0.d, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = xor <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @xor_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @xor_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: xor_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = xor <16 x i64> %op1, %op2
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @xor_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @xor_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: xor_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = xor <32 x i64> %op1, %op2
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @smax_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @smax_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: smax_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @smax_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @smax_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: smax_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: smax z0.b, p0/m, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %op1, <64 x i8> %op2)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @smax_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @smax_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: smax_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = call <128 x i8> @llvm.smax.v128i8(<128 x i8> %op1, <128 x i8> %op2)
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @smax_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @smax_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: smax_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = call <256 x i8> @llvm.smax.v256i8(<256 x i8> %op1, <256 x i8> %op2)
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @smax_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @smax_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: smax_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @smax_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @smax_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: smax_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: smax z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = call <32 x i16> @llvm.smax.v32i16(<32 x i16> %op1, <32 x i16> %op2)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @smax_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @smax_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: smax_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = call <64 x i16> @llvm.smax.v64i16(<64 x i16> %op1, <64 x i16> %op2)
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @smax_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @smax_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: smax_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = call <128 x i16> @llvm.smax.v128i16(<128 x i16> %op1, <128 x i16> %op2)
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @smax_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @smax_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: smax_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @smax_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @smax_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: smax_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: smax z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %op1, <16 x i32> %op2)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @smax_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @smax_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: smax_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = call <32 x i32> @llvm.smax.v32i32(<32 x i32> %op1, <32 x i32> %op2)
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @smax_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @smax_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: smax_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = call <64 x i32> @llvm.smax.v64i32(<64 x i32> %op1, <64 x i32> %op2)
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @smax_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @smax_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: smax_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @smax_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @smax_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: smax_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: smax z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = call <8 x i64> @llvm.smax.v8i64(<8 x i64> %op1, <8 x i64> %op2)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @smax_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @smax_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: smax_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = call <16 x i64> @llvm.smax.v16i64(<16 x i64> %op1, <16 x i64> %op2)
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @smax_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @smax_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: smax_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = call <32 x i64> @llvm.smax.v32i64(<32 x i64> %op1, <32 x i64> %op2)
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @smin_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @smin_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: smin_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @smin_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @smin_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: smin_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: smin z0.b, p0/m, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %op1, <64 x i8> %op2)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @smin_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @smin_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: smin_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = call <128 x i8> @llvm.smin.v128i8(<128 x i8> %op1, <128 x i8> %op2)
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @smin_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @smin_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: smin_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = call <256 x i8> @llvm.smin.v256i8(<256 x i8> %op1, <256 x i8> %op2)
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @smin_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @smin_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: smin_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @smin_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @smin_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: smin_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: smin z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = call <32 x i16> @llvm.smin.v32i16(<32 x i16> %op1, <32 x i16> %op2)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @smin_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @smin_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: smin_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = call <64 x i16> @llvm.smin.v64i16(<64 x i16> %op1, <64 x i16> %op2)
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @smin_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @smin_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: smin_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = call <128 x i16> @llvm.smin.v128i16(<128 x i16> %op1, <128 x i16> %op2)
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @smin_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @smin_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: smin_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @smin_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @smin_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: smin_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: smin z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %op1, <16 x i32> %op2)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @smin_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @smin_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: smin_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = call <32 x i32> @llvm.smin.v32i32(<32 x i32> %op1, <32 x i32> %op2)
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @smin_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @smin_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: smin_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = call <64 x i32> @llvm.smin.v64i32(<64 x i32> %op1, <64 x i32> %op2)
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @smin_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @smin_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: smin_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @smin_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @smin_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: smin_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: smin z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %op1, <8 x i64> %op2)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @smin_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @smin_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: smin_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = call <16 x i64> @llvm.smin.v16i64(<16 x i64> %op1, <16 x i64> %op2)
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @smin_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @smin_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: smin_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = call <32 x i64> @llvm.smin.v32i64(<32 x i64> %op1, <32 x i64> %op2)
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @umax_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @umax_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: umax_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @umax_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @umax_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: umax_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: umax z0.b, p0/m, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = call <64 x i8> @llvm.umax.v64i8(<64 x i8> %op1, <64 x i8> %op2)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @umax_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @umax_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: umax_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = call <128 x i8> @llvm.umax.v128i8(<128 x i8> %op1, <128 x i8> %op2)
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @umax_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @umax_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: umax_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = call <256 x i8> @llvm.umax.v256i8(<256 x i8> %op1, <256 x i8> %op2)
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @umax_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @umax_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: umax_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @umax_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @umax_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: umax_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: umax z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = call <32 x i16> @llvm.umax.v32i16(<32 x i16> %op1, <32 x i16> %op2)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @umax_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @umax_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: umax_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = call <64 x i16> @llvm.umax.v64i16(<64 x i16> %op1, <64 x i16> %op2)
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @umax_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @umax_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: umax_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = call <128 x i16> @llvm.umax.v128i16(<128 x i16> %op1, <128 x i16> %op2)
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @umax_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @umax_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: umax_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @umax_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @umax_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: umax_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: umax z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = call <16 x i32> @llvm.umax.v16i32(<16 x i32> %op1, <16 x i32> %op2)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @umax_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @umax_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: umax_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = call <32 x i32> @llvm.umax.v32i32(<32 x i32> %op1, <32 x i32> %op2)
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @umax_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @umax_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: umax_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = call <64 x i32> @llvm.umax.v64i32(<64 x i32> %op1, <64 x i32> %op2)
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @umax_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @umax_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: umax_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @umax_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @umax_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: umax_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: umax z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = call <8 x i64> @llvm.umax.v8i64(<8 x i64> %op1, <8 x i64> %op2)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @umax_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @umax_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: umax_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = call <16 x i64> @llvm.umax.v16i64(<16 x i64> %op1, <16 x i64> %op2)
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @umax_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @umax_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: umax_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = call <32 x i64> @llvm.umax.v32i64(<32 x i64> %op1, <32 x i64> %op2)
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @umin_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @umin_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: umin_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @umin_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @umin_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: umin_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: umin z0.b, p0/m, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %op1, <64 x i8> %op2)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @umin_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @umin_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: umin_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = call <128 x i8> @llvm.umin.v128i8(<128 x i8> %op1, <128 x i8> %op2)
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @umin_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @umin_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: umin_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = call <256 x i8> @llvm.umin.v256i8(<256 x i8> %op1, <256 x i8> %op2)
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @umin_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @umin_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: umin_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @umin_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @umin_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: umin_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: umin z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = call <32 x i16> @llvm.umin.v32i16(<32 x i16> %op1, <32 x i16> %op2)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @umin_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @umin_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: umin_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = call <64 x i16> @llvm.umin.v64i16(<64 x i16> %op1, <64 x i16> %op2)
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @umin_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @umin_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: umin_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = call <128 x i16> @llvm.umin.v128i16(<128 x i16> %op1, <128 x i16> %op2)
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @umin_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @umin_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: umin_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @umin_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @umin_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: umin_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: umin z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = call <16 x i32> @llvm.umin.v16i32(<16 x i32> %op1, <16 x i32> %op2)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @umin_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @umin_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: umin_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = call <32 x i32> @llvm.umin.v32i32(<32 x i32> %op1, <32 x i32> %op2)
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @umin_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @umin_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: umin_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = call <64 x i32> @llvm.umin.v64i32(<64 x i32> %op1, <64 x i32> %op2)
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @umin_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @umin_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: umin_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @umin_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @umin_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: umin_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: umin z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = call <8 x i64> @llvm.umin.v8i64(<8 x i64> %op1, <8 x i64> %op2)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @umin_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @umin_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: umin_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = call <16 x i64> @llvm.umin.v16i64(<16 x i64> %op1, <16 x i64> %op2)
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @umin_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @umin_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: umin_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = call <32 x i64> @llvm.umin.v32i64(<32 x i64> %op1, <32 x i64> %op2)
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @smulh_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @smulh_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: smulh_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%1 = sext <32 x i8> %op1 to <32 x i16>
%2 = sext <32 x i8> %op2 to <32 x i16>
%mul = mul <32 x i16> %1, %2
%shr = lshr <32 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%res = trunc <32 x i16> %shr to <32 x i8>
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @smulh_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @smulh_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: smulh_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: smulh z0.b, p0/m, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%insert = insertelement <64 x i16> undef, i16 8, i64 0
%splat = shufflevector <64 x i16> %insert, <64 x i16> undef, <64 x i32> zeroinitializer
%1 = sext <64 x i8> %op1 to <64 x i16>
%mul = mul <64 x i16> %1, %2
%shr = lshr <64 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%res = trunc <64 x i16> %shr to <64 x i8>
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @smulh_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @smulh_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: smulh_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%1 = sext <128 x i8> %op1 to <128 x i16>
%2 = sext <128 x i8> %op2 to <128 x i16>
%mul = mul <128 x i16> %1, %2
%shr = lshr <128 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%res = trunc <128 x i16> %shr to <128 x i8>
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @smulh_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @smulh_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: smulh_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%1 = sext <256 x i8> %op1 to <256 x i16>
%2 = sext <256 x i8> %op2 to <256 x i16>
%mul = mul <256 x i16> %1, %2
%shr = lshr <256 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%res = trunc <256 x i16> %shr to <256 x i8>
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @smulh_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @smulh_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: smulh_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%1 = sext <16 x i16> %op1 to <16 x i32>
%2 = sext <16 x i16> %op2 to <16 x i32>
%mul = mul <16 x i32> %1, %2
%shr = lshr <16 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%res = trunc <16 x i32> %shr to <16 x i16>
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @smulh_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @smulh_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: smulh_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: smulh z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%1 = sext <32 x i16> %op1 to <32 x i32>
%2 = sext <32 x i16> %op2 to <32 x i32>
%mul = mul <32 x i32> %1, %2
%shr = lshr <32 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%res = trunc <32 x i32> %shr to <32 x i16>
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @smulh_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @smulh_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: smulh_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%1 = sext <64 x i16> %op1 to <64 x i32>
%2 = sext <64 x i16> %op2 to <64 x i32>
%mul = mul <64 x i32> %1, %2
%shr = lshr <64 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%res = trunc <64 x i32> %shr to <64 x i16>
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @smulh_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @smulh_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: smulh_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%1 = sext <128 x i16> %op1 to <128 x i32>
%2 = sext <128 x i16> %op2 to <128 x i32>
%mul = mul <128 x i32> %1, %2
%shr = lshr <128 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%res = trunc <128 x i32> %shr to <128 x i16>
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @smulh_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @smulh_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: smulh_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%1 = sext <8 x i32> %op1 to <8 x i64>
%2 = sext <8 x i32> %op2 to <8 x i64>
%mul = mul <8 x i64> %1, %2
%shr = lshr <8 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%res = trunc <8 x i64> %shr to <8 x i32>
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @smulh_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @smulh_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: smulh_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: smulh z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%1 = sext <16 x i32> %op1 to <16 x i64>
%2 = sext <16 x i32> %op2 to <16 x i64>
%mul = mul <16 x i64> %1, %2
%shr = lshr <16 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%res = trunc <16 x i64> %shr to <16 x i32>
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @smulh_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @smulh_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: smulh_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%1 = sext <32 x i32> %op1 to <32 x i64>
%2 = sext <32 x i32> %op2 to <32 x i64>
%mul = mul <32 x i64> %1, %2
%shr = lshr <32 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%res = trunc <32 x i64> %shr to <32 x i32>
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @smulh_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @smulh_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: smulh_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%1 = sext <64 x i32> %op1 to <64 x i64>
%2 = sext <64 x i32> %op2 to <64 x i64>
%mul = mul <64 x i64> %1, %2
%shr = lshr <64 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%res = trunc <64 x i64> %shr to <64 x i32>
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @smulh_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @smulh_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: smulh_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%1 = sext <4 x i64> %op1 to <4 x i128>
%2 = sext <4 x i64> %op2 to <4 x i128>
%mul = mul <4 x i128> %1, %2
%shr = lshr <4 x i128> %mul, <i128 64, i128 64, i128 64, i128 64>
%res = trunc <4 x i128> %shr to <4 x i64>
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @smulh_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @smulh_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: smulh_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: smulh z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%1 = sext <8 x i64> %op1 to <8 x i128>
%2 = sext <8 x i64> %op2 to <8 x i128>
%mul = mul <8 x i128> %1, %2
%shr = lshr <8 x i128> %mul, <i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64>
%res = trunc <8 x i128> %shr to <8 x i64>
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @smulh_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @smulh_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: smulh_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%1 = sext <16 x i64> %op1 to <16 x i128>
%2 = sext <16 x i64> %op2 to <16 x i128>
%mul = mul <16 x i128> %1, %2
%shr = lshr <16 x i128> %mul, <i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64>
%res = trunc <16 x i128> %shr to <16 x i64>
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @smulh_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @smulh_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: smulh_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%1 = sext <32 x i64> %op1 to <32 x i128>
%2 = sext <32 x i64> %op2 to <32 x i128>
%mul = mul <32 x i128> %1, %2
%shr = lshr <32 x i128> %mul, <i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64>
%res = trunc <32 x i128> %shr to <32 x i64>
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @umulh_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @umulh_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: umulh_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%1 = zext <32 x i8> %op1 to <32 x i16>
%2 = zext <32 x i8> %op2 to <32 x i16>
%mul = mul <32 x i16> %1, %2
%shr = lshr <32 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%res = trunc <32 x i16> %shr to <32 x i8>
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @umulh_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @umulh_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: umulh_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: umulh z0.b, p0/m, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%1 = zext <64 x i8> %op1 to <64 x i16>
%2 = zext <64 x i8> %op2 to <64 x i16>
%mul = mul <64 x i16> %1, %2
%shr = lshr <64 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%res = trunc <64 x i16> %shr to <64 x i8>
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @umulh_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @umulh_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: umulh_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%insert = insertelement <128 x i16> undef, i16 8, i64 0
%splat = shufflevector <128 x i16> %insert, <128 x i16> undef, <128 x i32> zeroinitializer
%1 = zext <128 x i8> %op1 to <128 x i16>
%mul = mul <128 x i16> %1, %2
%shr = lshr <128 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%res = trunc <128 x i16> %shr to <128 x i8>
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @umulh_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @umulh_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: umulh_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%1 = zext <256 x i8> %op1 to <256 x i16>
%2 = zext <256 x i8> %op2 to <256 x i16>
%mul = mul <256 x i16> %1, %2
%shr = lshr <256 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%res = trunc <256 x i16> %shr to <256 x i8>
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @umulh_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @umulh_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: umulh_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%1 = zext <16 x i16> %op1 to <16 x i32>
%2 = zext <16 x i16> %op2 to <16 x i32>
%mul = mul <16 x i32> %1, %2
%shr = lshr <16 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%res = trunc <16 x i32> %shr to <16 x i16>
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @umulh_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @umulh_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: umulh_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: umulh z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%1 = zext <32 x i16> %op1 to <32 x i32>
%2 = zext <32 x i16> %op2 to <32 x i32>
%mul = mul <32 x i32> %1, %2
%shr = lshr <32 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%res = trunc <32 x i32> %shr to <32 x i16>
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @umulh_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @umulh_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: umulh_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%1 = zext <64 x i16> %op1 to <64 x i32>
%2 = zext <64 x i16> %op2 to <64 x i32>
%mul = mul <64 x i32> %1, %2
%shr = lshr <64 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%res = trunc <64 x i32> %shr to <64 x i16>
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @umulh_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @umulh_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: umulh_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%1 = zext <128 x i16> %op1 to <128 x i32>
%2 = zext <128 x i16> %op2 to <128 x i32>
%mul = mul <128 x i32> %1, %2
%shr = lshr <128 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%res = trunc <128 x i32> %shr to <128 x i16>
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @umulh_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @umulh_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: umulh_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%insert = insertelement <8 x i64> undef, i64 32, i64 0
%splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
%1 = zext <8 x i32> %op1 to <8 x i64>
%mul = mul <8 x i64> %1, %2
%shr = lshr <8 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%res = trunc <8 x i64> %shr to <8 x i32>
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @umulh_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @umulh_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: umulh_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: umulh z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%1 = zext <16 x i32> %op1 to <16 x i64>
%2 = zext <16 x i32> %op2 to <16 x i64>
%mul = mul <16 x i64> %1, %2
%shr = lshr <16 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%res = trunc <16 x i64> %shr to <16 x i32>
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @umulh_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @umulh_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: umulh_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%1 = zext <32 x i32> %op1 to <32 x i64>
%2 = zext <32 x i32> %op2 to <32 x i64>
%mul = mul <32 x i64> %1, %2
%shr = lshr <32 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%res = trunc <32 x i64> %shr to <32 x i32>
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @umulh_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @umulh_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: umulh_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%1 = zext <64 x i32> %op1 to <64 x i64>
%2 = zext <64 x i32> %op2 to <64 x i64>
%mul = mul <64 x i64> %1, %2
%shr = lshr <64 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%res = trunc <64 x i64> %shr to <64 x i32>
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @umulh_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @umulh_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: umulh_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%1 = zext <4 x i64> %op1 to <4 x i128>
%2 = zext <4 x i64> %op2 to <4 x i128>
%mul = mul <4 x i128> %1, %2
%shr = lshr <4 x i128> %mul, <i128 64, i128 64, i128 64, i128 64>
%res = trunc <4 x i128> %shr to <4 x i64>
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @umulh_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @umulh_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: umulh_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: umulh z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%1 = zext <8 x i64> %op1 to <8 x i128>
%2 = zext <8 x i64> %op2 to <8 x i128>
%mul = mul <8 x i128> %1, %2
%shr = lshr <8 x i128> %mul, <i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64>
%res = trunc <8 x i128> %shr to <8 x i64>
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @umulh_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @umulh_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: umulh_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%1 = zext <16 x i64> %op1 to <16 x i128>
%2 = zext <16 x i64> %op2 to <16 x i128>
%mul = mul <16 x i128> %1, %2
%shr = lshr <16 x i128> %mul, <i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64>
%res = trunc <16 x i128> %shr to <16 x i64>
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @umulh_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @umulh_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: umulh_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%1 = zext <32 x i64> %op1 to <32 x i128>
%2 = zext <32 x i64> %op2 to <32 x i128>
%mul = mul <32 x i128> %1, %2
%shr = lshr <32 x i128> %mul, <i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64, i128 64>
%res = trunc <32 x i128> %shr to <32 x i64>
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
attributes #0 = { "target-features"="+sve" }
ret i8 %res
}
-define i8 @uaddv_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define i8 @uaddv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: uaddv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
- %op = load <32 x i8>, <32 x i8>* %a
+ %op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %op)
ret i8 %res
}
-define i8 @uaddv_v64i8(<64 x i8>* %a) #0 {
+define i8 @uaddv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: uaddv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: // kill: def $w0 killed $w0 killed $x0
; VBITS_GE_512-NEXT: ret
- %op = load <64 x i8>, <64 x i8>* %a
+ %op = load <64 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> %op)
ret i8 %res
}
-define i8 @uaddv_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define i8 @uaddv_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: uaddv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
- %op = load <128 x i8>, <128 x i8>* %a
+ %op = load <128 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> %op)
ret i8 %res
}
-define i8 @uaddv_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define i8 @uaddv_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: uaddv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
- %op = load <256 x i8>, <256 x i8>* %a
+ %op = load <256 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> %op)
ret i8 %res
}
ret i16 %res
}
-define i16 @uaddv_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define i16 @uaddv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: uaddv_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %op)
ret i16 %res
}
-define i16 @uaddv_v32i16(<32 x i16>* %a) #0 {
+define i16 @uaddv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: uaddv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: // kill: def $w0 killed $w0 killed $x0
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> %op)
ret i16 %res
}
-define i16 @uaddv_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define i16 @uaddv_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: uaddv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> %op)
ret i16 %res
}
-define i16 @uaddv_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define i16 @uaddv_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: uaddv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> %op)
ret i16 %res
}
ret i32 %res
}
-define i32 @uaddv_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define i32 @uaddv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: uaddv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %op)
ret i32 %res
}
-define i32 @uaddv_v16i32(<16 x i32>* %a) #0 {
+define i32 @uaddv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: uaddv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: // kill: def $w0 killed $w0 killed $x0
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %op)
ret i32 %res
}
-define i32 @uaddv_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define i32 @uaddv_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: uaddv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %op)
ret i32 %res
}
-define i32 @uaddv_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define i32 @uaddv_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: uaddv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> %op)
ret i32 %res
}
ret i64 %res
}
-define i64 @uaddv_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define i64 @uaddv_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: uaddv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uaddv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %op)
ret i64 %res
}
-define i64 @uaddv_v8i64(<8 x i64>* %a) #0 {
+define i64 @uaddv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: uaddv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: uaddv d0, p0, z0.d
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %op)
ret i64 %res
}
-define i64 @uaddv_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define i64 @uaddv_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: uaddv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: uaddv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %op)
ret i64 %res
}
-define i64 @uaddv_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define i64 @uaddv_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: uaddv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: uaddv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> %op)
ret i64 %res
}
ret i8 %res
}
-define i8 @smaxv_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define i8 @smaxv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: smaxv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: smaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i8>, <32 x i8>* %a
+ %op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %op)
ret i8 %res
}
-define i8 @smaxv_v64i8(<64 x i8>* %a) #0 {
+define i8 @smaxv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: smaxv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: smaxv b0, p0, z0.b
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <64 x i8>, <64 x i8>* %a
+ %op = load <64 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> %op)
ret i8 %res
}
-define i8 @smaxv_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define i8 @smaxv_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: smaxv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: smaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i8>, <128 x i8>* %a
+ %op = load <128 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> %op)
ret i8 %res
}
-define i8 @smaxv_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define i8 @smaxv_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: smaxv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: smaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <256 x i8>, <256 x i8>* %a
+ %op = load <256 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.smax.v256i8(<256 x i8> %op)
ret i8 %res
}
ret i16 %res
}
-define i16 @smaxv_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define i16 @smaxv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: smaxv_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: smaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %op)
ret i16 %res
}
-define i16 @smaxv_v32i16(<32 x i16>* %a) #0 {
+define i16 @smaxv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: smaxv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: smaxv h0, p0, z0.h
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> %op)
ret i16 %res
}
-define i16 @smaxv_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define i16 @smaxv_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: smaxv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: smaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> %op)
ret i16 %res
}
-define i16 @smaxv_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define i16 @smaxv_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: smaxv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: smaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> %op)
ret i16 %res
}
ret i32 %res
}
-define i32 @smaxv_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define i32 @smaxv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: smaxv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: smaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %op)
ret i32 %res
}
-define i32 @smaxv_v16i32(<16 x i32>* %a) #0 {
+define i32 @smaxv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: smaxv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: smaxv s0, p0, z0.s
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %op)
ret i32 %res
}
-define i32 @smaxv_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define i32 @smaxv_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: smaxv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: smaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> %op)
ret i32 %res
}
-define i32 @smaxv_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define i32 @smaxv_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: smaxv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: smaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> %op)
ret i32 %res
}
ret i64 %res
}
-define i64 @smaxv_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define i64 @smaxv_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: smaxv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: smaxv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %op)
ret i64 %res
}
-define i64 @smaxv_v8i64(<8 x i64>* %a) #0 {
+define i64 @smaxv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: smaxv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: smaxv d0, p0, z0.d
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %op)
ret i64 %res
}
-define i64 @smaxv_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define i64 @smaxv_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: smaxv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: smaxv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> %op)
ret i64 %res
}
-define i64 @smaxv_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define i64 @smaxv_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: smaxv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: smaxv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> %op)
ret i64 %res
}
ret i8 %res
}
-define i8 @sminv_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define i8 @sminv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: sminv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: sminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i8>, <32 x i8>* %a
+ %op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %op)
ret i8 %res
}
-define i8 @sminv_v64i8(<64 x i8>* %a) #0 {
+define i8 @sminv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: sminv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: sminv b0, p0, z0.b
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <64 x i8>, <64 x i8>* %a
+ %op = load <64 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> %op)
ret i8 %res
}
-define i8 @sminv_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define i8 @sminv_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: sminv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: sminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i8>, <128 x i8>* %a
+ %op = load <128 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> %op)
ret i8 %res
}
-define i8 @sminv_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define i8 @sminv_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: sminv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: sminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <256 x i8>, <256 x i8>* %a
+ %op = load <256 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.smin.v256i8(<256 x i8> %op)
ret i8 %res
}
ret i16 %res
}
-define i16 @sminv_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define i16 @sminv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: sminv_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: sminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %op)
ret i16 %res
}
-define i16 @sminv_v32i16(<32 x i16>* %a) #0 {
+define i16 @sminv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: sminv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: sminv h0, p0, z0.h
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> %op)
ret i16 %res
}
-define i16 @sminv_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define i16 @sminv_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: sminv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: sminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> %op)
ret i16 %res
}
-define i16 @sminv_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define i16 @sminv_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: sminv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: sminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> %op)
ret i16 %res
}
ret i32 %res
}
-define i32 @sminv_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define i32 @sminv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: sminv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: sminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %op)
ret i32 %res
}
-define i32 @sminv_v16i32(<16 x i32>* %a) #0 {
+define i32 @sminv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: sminv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: sminv s0, p0, z0.s
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %op)
ret i32 %res
}
-define i32 @sminv_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define i32 @sminv_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: sminv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: sminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> %op)
ret i32 %res
}
-define i32 @sminv_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define i32 @sminv_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: sminv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: sminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> %op)
ret i32 %res
}
ret i64 %res
}
-define i64 @sminv_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define i64 @sminv_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: sminv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: sminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %op)
ret i64 %res
}
-define i64 @sminv_v8i64(<8 x i64>* %a) #0 {
+define i64 @sminv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: sminv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: sminv d0, p0, z0.d
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %op)
ret i64 %res
}
-define i64 @sminv_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define i64 @sminv_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: sminv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: sminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> %op)
ret i64 %res
}
-define i64 @sminv_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define i64 @sminv_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: sminv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: sminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> %op)
ret i64 %res
}
ret i8 %res
}
-define i8 @umaxv_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define i8 @umaxv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: umaxv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: umaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i8>, <32 x i8>* %a
+ %op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %op)
ret i8 %res
}
-define i8 @umaxv_v64i8(<64 x i8>* %a) #0 {
+define i8 @umaxv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: umaxv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: umaxv b0, p0, z0.b
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <64 x i8>, <64 x i8>* %a
+ %op = load <64 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> %op)
ret i8 %res
}
-define i8 @umaxv_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define i8 @umaxv_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: umaxv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: umaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i8>, <128 x i8>* %a
+ %op = load <128 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> %op)
ret i8 %res
}
-define i8 @umaxv_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define i8 @umaxv_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: umaxv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: umaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <256 x i8>, <256 x i8>* %a
+ %op = load <256 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.umax.v256i8(<256 x i8> %op)
ret i8 %res
}
ret i16 %res
}
-define i16 @umaxv_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define i16 @umaxv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: umaxv_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: umaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %op)
ret i16 %res
}
-define i16 @umaxv_v32i16(<32 x i16>* %a) #0 {
+define i16 @umaxv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: umaxv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: umaxv h0, p0, z0.h
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> %op)
ret i16 %res
}
-define i16 @umaxv_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define i16 @umaxv_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: umaxv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: umaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> %op)
ret i16 %res
}
-define i16 @umaxv_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define i16 @umaxv_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: umaxv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: umaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> %op)
ret i16 %res
}
ret i32 %res
}
-define i32 @umaxv_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define i32 @umaxv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: umaxv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: umaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %op)
ret i32 %res
}
-define i32 @umaxv_v16i32(<16 x i32>* %a) #0 {
+define i32 @umaxv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: umaxv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: umaxv s0, p0, z0.s
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %op)
ret i32 %res
}
-define i32 @umaxv_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define i32 @umaxv_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: umaxv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: umaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> %op)
ret i32 %res
}
-define i32 @umaxv_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define i32 @umaxv_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: umaxv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: umaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> %op)
ret i32 %res
}
ret i64 %res
}
-define i64 @umaxv_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define i64 @umaxv_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: umaxv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: umaxv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %op)
ret i64 %res
}
-define i64 @umaxv_v8i64(<8 x i64>* %a) #0 {
+define i64 @umaxv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: umaxv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: umaxv d0, p0, z0.d
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %op)
ret i64 %res
}
-define i64 @umaxv_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define i64 @umaxv_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: umaxv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: umaxv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> %op)
ret i64 %res
}
-define i64 @umaxv_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define i64 @umaxv_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: umaxv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: umaxv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> %op)
ret i64 %res
}
ret i8 %res
}
-define i8 @uminv_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define i8 @uminv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: uminv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: uminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i8>, <32 x i8>* %a
+ %op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %op)
ret i8 %res
}
-define i8 @uminv_v64i8(<64 x i8>* %a) #0 {
+define i8 @uminv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: uminv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: uminv b0, p0, z0.b
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <64 x i8>, <64 x i8>* %a
+ %op = load <64 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> %op)
ret i8 %res
}
-define i8 @uminv_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define i8 @uminv_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: uminv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: uminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i8>, <128 x i8>* %a
+ %op = load <128 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> %op)
ret i8 %res
}
-define i8 @uminv_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define i8 @uminv_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: uminv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: uminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <256 x i8>, <256 x i8>* %a
+ %op = load <256 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.umin.v256i8(<256 x i8> %op)
ret i8 %res
}
ret i16 %res
}
-define i16 @uminv_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define i16 @uminv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: uminv_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: uminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %op)
ret i16 %res
}
-define i16 @uminv_v32i16(<32 x i16>* %a) #0 {
+define i16 @uminv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: uminv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: uminv h0, p0, z0.h
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> %op)
ret i16 %res
}
-define i16 @uminv_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define i16 @uminv_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: uminv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: uminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> %op)
ret i16 %res
}
-define i16 @uminv_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define i16 @uminv_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: uminv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: uminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> %op)
ret i16 %res
}
ret i32 %res
}
-define i32 @uminv_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define i32 @uminv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: uminv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: uminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %op)
ret i32 %res
}
-define i32 @uminv_v16i32(<16 x i32>* %a) #0 {
+define i32 @uminv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: uminv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: uminv s0, p0, z0.s
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %op)
ret i32 %res
}
-define i32 @uminv_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define i32 @uminv_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: uminv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: uminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> %op)
ret i32 %res
}
-define i32 @uminv_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define i32 @uminv_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: uminv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: uminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> %op)
ret i32 %res
}
ret i64 %res
}
-define i64 @uminv_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define i64 @uminv_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: uminv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %op)
ret i64 %res
}
-define i64 @uminv_v8i64(<8 x i64>* %a) #0 {
+define i64 @uminv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: uminv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: uminv d0, p0, z0.d
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %op)
ret i64 %res
}
-define i64 @uminv_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define i64 @uminv_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: uminv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: uminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> %op)
ret i64 %res
}
-define i64 @uminv_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define i64 @uminv_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: uminv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: uminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> %op)
ret i64 %res
}
ret <16 x i8> %res
}
-define void @srem_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(8,0) #0 {
+define void @srem_v32i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: srem_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = srem <32 x i8> %op1, %op2
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @srem_v64i8(<64 x i8>* %a, <64 x i8>* %b) vscale_range(16,0) #0 {
+define void @srem_v64i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: srem_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = srem <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @srem_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(16,0) #0 {
+define void @srem_v128i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: srem_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = srem <128 x i8> %op1, %op2
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @srem_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @srem_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: srem_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = srem <256 x i8> %op1, %op2
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @srem_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
+define void @srem_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: srem_v16i16:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q2, q0, [x0]
; VBITS_GE_512-NEXT: mls z0.h, p0/m, z2.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = srem <16 x i16> %op1, %op2
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @srem_v32i16(<32 x i16>* %a, <32 x i16>* %b) vscale_range(8,0) #0 {
+define void @srem_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: srem_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = srem <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @srem_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(16,0) #0 {
+define void @srem_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: srem_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = srem <64 x i16> %op1, %op2
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @srem_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @srem_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: srem_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = srem <128 x i16> %op1, %op2
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @srem_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @srem_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: srem_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = srem <8 x i32> %op1, %op2
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @srem_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @srem_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: srem_v16i32:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
; VBITS_GE_512-NEXT: mls z0.s, p0/m, z2.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = srem <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @srem_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @srem_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: srem_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = srem <32 x i32> %op1, %op2
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @srem_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @srem_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: srem_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = srem <64 x i32> %op1, %op2
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @srem_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @srem_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: srem_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = srem <4 x i64> %op1, %op2
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @srem_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @srem_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: srem_v8i64:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q4, q5, [x1]
; VBITS_GE_512-NEXT: mls z0.d, p0/m, z2.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = srem <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @srem_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @srem_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: srem_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = srem <16 x i64> %op1, %op2
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @srem_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @srem_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: srem_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = srem <32 x i64> %op1, %op2
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @urem_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(8,0) #0 {
+define void @urem_v32i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: urem_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = urem <32 x i8> %op1, %op2
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @urem_v64i8(<64 x i8>* %a, <64 x i8>* %b) vscale_range(16,0) #0 {
+define void @urem_v64i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: urem_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = urem <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @urem_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(16,0) #0 {
+define void @urem_v128i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: urem_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = urem <128 x i8> %op1, %op2
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @urem_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @urem_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: urem_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = urem <256 x i8> %op1, %op2
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @urem_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
+define void @urem_v16i16(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: urem_v16i16:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q2, q0, [x0]
; VBITS_GE_512-NEXT: mls z0.h, p0/m, z2.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = urem <16 x i16> %op1, %op2
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @urem_v32i16(<32 x i16>* %a, <32 x i16>* %b) vscale_range(8,0) #0 {
+define void @urem_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: urem_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = urem <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @urem_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(16,0) #0 {
+define void @urem_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: urem_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = urem <64 x i16> %op1, %op2
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @urem_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @urem_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: urem_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = urem <128 x i16> %op1, %op2
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @urem_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @urem_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: urem_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = urem <8 x i32> %op1, %op2
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @urem_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @urem_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: urem_v16i32:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
; VBITS_GE_512-NEXT: mls z0.s, p0/m, z2.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = urem <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @urem_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @urem_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: urem_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = urem <32 x i32> %op1, %op2
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @urem_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @urem_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: urem_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = urem <64 x i32> %op1, %op2
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @urem_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @urem_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: urem_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = urem <4 x i64> %op1, %op2
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @urem_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @urem_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: urem_v8i64:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q4, q5, [x1]
; VBITS_GE_512-NEXT: mls z0.d, p0/m, z2.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = urem <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @urem_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @urem_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: urem_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = urem <16 x i64> %op1, %op2
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @urem_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @urem_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: urem_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = urem <32 x i64> %op1, %op2
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %sel
}
-define void @select_v32i8(<32 x i8>* %a, <32 x i8>* %b, i1 %mask) vscale_range(2,0) #0 {
+define void @select_v32i8(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <32 x i8>, <32 x i8>* %a
- %op2 = load volatile <32 x i8>, <32 x i8>* %b
+ %op1 = load volatile <32 x i8>, ptr %a
+ %op2 = load volatile <32 x i8>, ptr %b
%sel = select i1 %mask, <32 x i8> %op1, <32 x i8> %op2
- store <32 x i8> %sel, <32 x i8>* %a
+ store <32 x i8> %sel, ptr %a
ret void
}
-define void @select_v64i8(<64 x i8>* %a, <64 x i8>* %b, i1 %mask) #0 {
+define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: sel z0.b, p1, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load volatile <64 x i8>, <64 x i8>* %a
- %op2 = load volatile <64 x i8>, <64 x i8>* %b
+ %op1 = load volatile <64 x i8>, ptr %a
+ %op2 = load volatile <64 x i8>, ptr %b
%sel = select i1 %mask, <64 x i8> %op1, <64 x i8> %op2
- store <64 x i8> %sel, <64 x i8>* %a
+ store <64 x i8> %sel, ptr %a
ret void
}
-define void @select_v128i8(<128 x i8>* %a, <128 x i8>* %b, i1 %mask) vscale_range(8,0) #0 {
+define void @select_v128i8(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <128 x i8>, <128 x i8>* %a
- %op2 = load volatile <128 x i8>, <128 x i8>* %b
+ %op1 = load volatile <128 x i8>, ptr %a
+ %op2 = load volatile <128 x i8>, ptr %b
%sel = select i1 %mask, <128 x i8> %op1, <128 x i8> %op2
- store <128 x i8> %sel, <128 x i8>* %a
+ store <128 x i8> %sel, ptr %a
ret void
}
-define void @select_v256i8(<256 x i8>* %a, <256 x i8>* %b, i1 %mask) vscale_range(16,0) #0 {
+define void @select_v256i8(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <256 x i8>, <256 x i8>* %a
- %op2 = load volatile <256 x i8>, <256 x i8>* %b
+ %op1 = load volatile <256 x i8>, ptr %a
+ %op2 = load volatile <256 x i8>, ptr %b
%sel = select i1 %mask, <256 x i8> %op1, <256 x i8> %op2
- store <256 x i8> %sel, <256 x i8>* %a
+ store <256 x i8> %sel, ptr %a
ret void
}
ret <8 x i16> %sel
}
-define void @select_v16i16(<16 x i16>* %a, <16 x i16>* %b, i1 %mask) vscale_range(2,0) #0 {
+define void @select_v16i16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <16 x i16>, <16 x i16>* %a
- %op2 = load volatile <16 x i16>, <16 x i16>* %b
+ %op1 = load volatile <16 x i16>, ptr %a
+ %op2 = load volatile <16 x i16>, ptr %b
%sel = select i1 %mask, <16 x i16> %op1, <16 x i16> %op2
- store <16 x i16> %sel, <16 x i16>* %a
+ store <16 x i16> %sel, ptr %a
ret void
}
-define void @select_v32i16(<32 x i16>* %a, <32 x i16>* %b, i1 %mask) #0 {
+define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load volatile <32 x i16>, <32 x i16>* %a
- %op2 = load volatile <32 x i16>, <32 x i16>* %b
+ %op1 = load volatile <32 x i16>, ptr %a
+ %op2 = load volatile <32 x i16>, ptr %b
%sel = select i1 %mask, <32 x i16> %op1, <32 x i16> %op2
- store <32 x i16> %sel, <32 x i16>* %a
+ store <32 x i16> %sel, ptr %a
ret void
}
-define void @select_v64i16(<64 x i16>* %a, <64 x i16>* %b, i1 %mask) vscale_range(8,0) #0 {
+define void @select_v64i16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <64 x i16>, <64 x i16>* %a
- %op2 = load volatile <64 x i16>, <64 x i16>* %b
+ %op1 = load volatile <64 x i16>, ptr %a
+ %op2 = load volatile <64 x i16>, ptr %b
%sel = select i1 %mask, <64 x i16> %op1, <64 x i16> %op2
- store <64 x i16> %sel, <64 x i16>* %a
+ store <64 x i16> %sel, ptr %a
ret void
}
-define void @select_v128i16(<128 x i16>* %a, <128 x i16>* %b, i1 %mask) vscale_range(16,0) #0 {
+define void @select_v128i16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <128 x i16>, <128 x i16>* %a
- %op2 = load volatile <128 x i16>, <128 x i16>* %b
+ %op1 = load volatile <128 x i16>, ptr %a
+ %op2 = load volatile <128 x i16>, ptr %b
%sel = select i1 %mask, <128 x i16> %op1, <128 x i16> %op2
- store <128 x i16> %sel, <128 x i16>* %a
+ store <128 x i16> %sel, ptr %a
ret void
}
ret <4 x i32> %sel
}
-define void @select_v8i32(<8 x i32>* %a, <8 x i32>* %b, i1 %mask) vscale_range(2,0) #0 {
+define void @select_v8i32(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <8 x i32>, <8 x i32>* %a
- %op2 = load volatile <8 x i32>, <8 x i32>* %b
+ %op1 = load volatile <8 x i32>, ptr %a
+ %op2 = load volatile <8 x i32>, ptr %b
%sel = select i1 %mask, <8 x i32> %op1, <8 x i32> %op2
- store <8 x i32> %sel, <8 x i32>* %a
+ store <8 x i32> %sel, ptr %a
ret void
}
-define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b, i1 %mask) #0 {
+define void @select_v16i32(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load volatile <16 x i32>, <16 x i32>* %a
- %op2 = load volatile <16 x i32>, <16 x i32>* %b
+ %op1 = load volatile <16 x i32>, ptr %a
+ %op2 = load volatile <16 x i32>, ptr %b
%sel = select i1 %mask, <16 x i32> %op1, <16 x i32> %op2
- store <16 x i32> %sel, <16 x i32>* %a
+ store <16 x i32> %sel, ptr %a
ret void
}
-define void @select_v32i32(<32 x i32>* %a, <32 x i32>* %b, i1 %mask) vscale_range(8,0) #0 {
+define void @select_v32i32(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <32 x i32>, <32 x i32>* %a
- %op2 = load volatile <32 x i32>, <32 x i32>* %b
+ %op1 = load volatile <32 x i32>, ptr %a
+ %op2 = load volatile <32 x i32>, ptr %b
%sel = select i1 %mask, <32 x i32> %op1, <32 x i32> %op2
- store <32 x i32> %sel, <32 x i32>* %a
+ store <32 x i32> %sel, ptr %a
ret void
}
-define void @select_v64i32(<64 x i32>* %a, <64 x i32>* %b, i1 %mask) vscale_range(16,0) #0 {
+define void @select_v64i32(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <64 x i32>, <64 x i32>* %a
- %op2 = load volatile <64 x i32>, <64 x i32>* %b
+ %op1 = load volatile <64 x i32>, ptr %a
+ %op2 = load volatile <64 x i32>, ptr %b
%sel = select i1 %mask, <64 x i32> %op1, <64 x i32> %op2
- store <64 x i32> %sel, <64 x i32>* %a
+ store <64 x i32> %sel, ptr %a
ret void
}
ret <2 x i64> %sel
}
-define void @select_v4i64(<4 x i64>* %a, <4 x i64>* %b, i1 %mask) vscale_range(2,0) #0 {
+define void @select_v4i64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <4 x i64>, <4 x i64>* %a
- %op2 = load volatile <4 x i64>, <4 x i64>* %b
+ %op1 = load volatile <4 x i64>, ptr %a
+ %op2 = load volatile <4 x i64>, ptr %b
%sel = select i1 %mask, <4 x i64> %op1, <4 x i64> %op2
- store <4 x i64> %sel, <4 x i64>* %a
+ store <4 x i64> %sel, ptr %a
ret void
}
-define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b, i1 %mask) #0 {
+define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load volatile <8 x i64>, <8 x i64>* %a
- %op2 = load volatile <8 x i64>, <8 x i64>* %b
+ %op1 = load volatile <8 x i64>, ptr %a
+ %op2 = load volatile <8 x i64>, ptr %b
%sel = select i1 %mask, <8 x i64> %op1, <8 x i64> %op2
- store <8 x i64> %sel, <8 x i64>* %a
+ store <8 x i64> %sel, ptr %a
ret void
}
-define void @select_v16i64(<16 x i64>* %a, <16 x i64>* %b, i1 %mask) vscale_range(8,0) #0 {
+define void @select_v16i64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <16 x i64>, <16 x i64>* %a
- %op2 = load volatile <16 x i64>, <16 x i64>* %b
+ %op1 = load volatile <16 x i64>, ptr %a
+ %op2 = load volatile <16 x i64>, ptr %b
%sel = select i1 %mask, <16 x i64> %op1, <16 x i64> %op2
- store <16 x i64> %sel, <16 x i64>* %a
+ store <16 x i64> %sel, ptr %a
ret void
}
-define void @select_v32i64(<32 x i64>* %a, <32 x i64>* %b, i1 %mask) vscale_range(16,0) #0 {
+define void @select_v32i64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load volatile <32 x i64>, <32 x i64>* %a
- %op2 = load volatile <32 x i64>, <32 x i64>* %b
+ %op1 = load volatile <32 x i64>, ptr %a
+ %op2 = load volatile <32 x i64>, ptr %b
%sel = select i1 %mask, <32 x i64> %op1, <32 x i64> %op2
- store <32 x i64> %sel, <32 x i64>* %a
+ store <32 x i64> %sel, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @ashr_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @ashr_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: ashr_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = ashr <32 x i8> %op1, %op2
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @ashr_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @ashr_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: ashr_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: asr z0.b, p0/m, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = ashr <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @ashr_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @ashr_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: ashr_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = ashr <128 x i8> %op1, %op2
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @ashr_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @ashr_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: ashr_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = ashr <256 x i8> %op1, %op2
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @ashr_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @ashr_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: ashr_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = ashr <16 x i16> %op1, %op2
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @ashr_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @ashr_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: ashr_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: asr z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = ashr <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @ashr_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @ashr_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: ashr_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = ashr <64 x i16> %op1, %op2
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @ashr_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @ashr_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: ashr_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = ashr <128 x i16> %op1, %op2
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @ashr_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @ashr_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: ashr_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = ashr <8 x i32> %op1, %op2
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @ashr_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @ashr_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: ashr_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: asr z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = ashr <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @ashr_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @ashr_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: ashr_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = ashr <32 x i32> %op1, %op2
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @ashr_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @ashr_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: ashr_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = ashr <64 x i32> %op1, %op2
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @ashr_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @ashr_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: ashr_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = ashr <4 x i64> %op1, %op2
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @ashr_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @ashr_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: ashr_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: asr z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = ashr <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @ashr_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @ashr_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: ashr_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = ashr <16 x i64> %op1, %op2
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @ashr_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @ashr_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: ashr_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = ashr <32 x i64> %op1, %op2
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @lshr_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @lshr_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: lshr_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = lshr <32 x i8> %op1, %op2
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @lshr_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @lshr_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: lshr_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: lsr z0.b, p0/m, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = lshr <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @lshr_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @lshr_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: lshr_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = lshr <128 x i8> %op1, %op2
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @lshr_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @lshr_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: lshr_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = lshr <256 x i8> %op1, %op2
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @lshr_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @lshr_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: lshr_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = lshr <16 x i16> %op1, %op2
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @lshr_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @lshr_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: lshr_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: lsr z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = lshr <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @lshr_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @lshr_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: lshr_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = lshr <64 x i16> %op1, %op2
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @lshr_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @lshr_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: lshr_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = lshr <128 x i16> %op1, %op2
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @lshr_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @lshr_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: lshr_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = lshr <8 x i32> %op1, %op2
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @lshr_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @lshr_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: lshr_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: lsr z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = lshr <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @lshr_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @lshr_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: lshr_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = lshr <32 x i32> %op1, %op2
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @lshr_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @lshr_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: lshr_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = lshr <64 x i32> %op1, %op2
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @lshr_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @lshr_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: lshr_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = lshr <4 x i64> %op1, %op2
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @lshr_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @lshr_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: lshr_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: lsr z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = lshr <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @lshr_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @lshr_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: lshr_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = lshr <16 x i64> %op1, %op2
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @lshr_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @lshr_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: lshr_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = lshr <32 x i64> %op1, %op2
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @shl_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @shl_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: shl_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%res = shl <32 x i8> %op1, %op2
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @shl_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @shl_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: shl_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: lsl z0.b, p0/m, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = shl <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @shl_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @shl_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: shl_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%res = shl <128 x i8> %op1, %op2
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @shl_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @shl_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: shl_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%res = shl <256 x i8> %op1, %op2
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @shl_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @shl_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: shl_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%res = shl <16 x i16> %op1, %op2
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @shl_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @shl_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: shl_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: lsl z0.h, p0/m, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = shl <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @shl_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @shl_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: shl_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%res = shl <64 x i16> %op1, %op2
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @shl_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @shl_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: shl_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%res = shl <128 x i16> %op1, %op2
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @shl_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @shl_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: shl_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%res = shl <8 x i32> %op1, %op2
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @shl_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @shl_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: shl_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: lsl z0.s, p0/m, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%res = shl <16 x i32> %op1, %op2
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @shl_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @shl_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: shl_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%res = shl <32 x i32> %op1, %op2
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @shl_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @shl_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: shl_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%res = shl <64 x i32> %op1, %op2
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @shl_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @shl_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: shl_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%res = shl <4 x i64> %op1, %op2
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @shl_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @shl_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: shl_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: lsl z0.d, p0/m, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%res = shl <8 x i64> %op1, %op2
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @shl_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @shl_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: shl_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%res = shl <16 x i64> %op1, %op2
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @shl_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @shl_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: shl_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%res = shl <32 x i64> %op1, %op2
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
}
; Don't use SVE for 128-bit vectors.
-define void @ucvtf_v8i16_v8f16(<8 x i16>* %a, <8 x half>* %b) vscale_range(2,0) #0 {
+define void @ucvtf_v8i16_v8f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v8i16_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ucvtf v0.8h, v0.8h
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x i16>, <8 x i16>* %a
+ %op1 = load <8 x i16>, ptr %a
%res = uitofp <8 x i16> %op1 to <8 x half>
- store <8 x half> %res, <8 x half>* %b
+ store <8 x half> %res, ptr %b
ret void
}
-define void @ucvtf_v16i16_v16f16(<16 x i16>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @ucvtf_v16i16_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v16i16_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
+ %op1 = load <16 x i16>, ptr %a
%res = uitofp <16 x i16> %op1 to <16 x half>
- store <16 x half> %res, <16 x half>* %b
+ store <16 x half> %res, ptr %b
ret void
}
-define void @ucvtf_v32i16_v32f16(<32 x i16>* %a, <32 x half>* %b) #0 {
+define void @ucvtf_v32i16_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: ucvtf_v32i16_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: ucvtf z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%res = uitofp <32 x i16> %op1 to <32 x half>
- store <32 x half> %res, <32 x half>* %b
+ store <32 x half> %res, ptr %b
ret void
}
-define void @ucvtf_v64i16_v64f16(<64 x i16>* %a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @ucvtf_v64i16_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: ucvtf_v64i16_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
+ %op1 = load <64 x i16>, ptr %a
%res = uitofp <64 x i16> %op1 to <64 x half>
- store <64 x half> %res, <64 x half>* %b
+ store <64 x half> %res, ptr %b
ret void
}
-define void @ucvtf_v128i16_v128f16(<128 x i16>* %a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @ucvtf_v128i16_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: ucvtf_v128i16_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
+ %op1 = load <128 x i16>, ptr %a
%res = uitofp <128 x i16> %op1 to <128 x half>
- store <128 x half> %res, <128 x half>* %b
+ store <128 x half> %res, ptr %b
ret void
}
ret <4 x float> %res
}
-define void @ucvtf_v8i16_v8f32(<8 x i16>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @ucvtf_v8i16_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v8i16_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x i16>, <8 x i16>* %a
+ %op1 = load <8 x i16>, ptr %a
%res = uitofp <8 x i16> %op1 to <8 x float>
- store <8 x float> %res, <8 x float>* %b
+ store <8 x float> %res, ptr %b
ret void
}
-define void @ucvtf_v16i16_v16f32(<16 x i16>* %a, <16 x float>* %b) #0 {
+define void @ucvtf_v16i16_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: ucvtf_v16i16_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_512-NEXT: ucvtf z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
+ %op1 = load <16 x i16>, ptr %a
%res = uitofp <16 x i16> %op1 to <16 x float>
- store <16 x float> %res, <16 x float>* %b
+ store <16 x float> %res, ptr %b
ret void
}
-define void @ucvtf_v32i16_v32f32(<32 x i16>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @ucvtf_v32i16_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: ucvtf_v32i16_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%res = uitofp <32 x i16> %op1 to <32 x float>
- store <32 x float> %res, <32 x float>* %b
+ store <32 x float> %res, ptr %b
ret void
}
-define void @ucvtf_v64i16_v64f32(<64 x i16>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @ucvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: ucvtf_v64i16_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
+ %op1 = load <64 x i16>, ptr %a
%res = uitofp <64 x i16> %op1 to <64 x float>
- store <64 x float> %res, <64 x float>* %b
+ store <64 x float> %res, ptr %b
ret void
}
ret <2 x double> %res
}
-define void @ucvtf_v4i16_v4f64(<4 x i16>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @ucvtf_v4i16_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v4i16_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x i16>, <4 x i16>* %a
+ %op1 = load <4 x i16>, ptr %a
%res = uitofp <4 x i16> %op1 to <4 x double>
- store <4 x double> %res, <4 x double>* %b
+ store <4 x double> %res, ptr %b
ret void
}
-define void @ucvtf_v8i16_v8f64(<8 x i16>* %a, <8 x double>* %b) #0 {
+define void @ucvtf_v8i16_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: ucvtf_v8i16_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x0]
; VBITS_GE_512-NEXT: ucvtf z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i16>, <8 x i16>* %a
+ %op1 = load <8 x i16>, ptr %a
%res = uitofp <8 x i16> %op1 to <8 x double>
- store <8 x double> %res, <8 x double>* %b
+ store <8 x double> %res, ptr %b
ret void
}
-define void @ucvtf_v16i16_v16f64(<16 x i16>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: ucvtf_v16i16_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
+ %op1 = load <16 x i16>, ptr %a
%res = uitofp <16 x i16> %op1 to <16 x double>
- store <16 x double> %res, <16 x double>* %b
+ store <16 x double> %res, ptr %b
ret void
}
-define void @ucvtf_v32i16_v32f64(<32 x i16>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @ucvtf_v32i16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: ucvtf_v32i16_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%res = uitofp <32 x i16> %op1 to <32 x double>
- store <32 x double> %res, <32 x double>* %b
+ store <32 x double> %res, ptr %b
ret void
}
ret <4 x half> %res
}
-define <8 x half> @ucvtf_v8i32_v8f16(<8 x i32>* %a) vscale_range(2,0) #0 {
+define <8 x half> @ucvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v8i32_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
+ %op1 = load <8 x i32>, ptr %a
%res = uitofp <8 x i32> %op1 to <8 x half>
ret <8 x half> %res
}
-define void @ucvtf_v16i32_v16f16(<16 x i32>* %a, <16 x half>* %b) #0 {
+define void @ucvtf_v16i32_v16f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: ucvtf_v16i32_v16f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%res = uitofp <16 x i32> %op1 to <16 x half>
- store <16 x half> %res, <16 x half>* %b
+ store <16 x half> %res, ptr %b
ret void
}
-define void @ucvtf_v32i32_v32f16(<32 x i32>* %a, <32 x half>* %b) vscale_range(8,0) #0 {
+define void @ucvtf_v32i32_v32f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: ucvtf_v32i32_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
+ %op1 = load <32 x i32>, ptr %a
%res = uitofp <32 x i32> %op1 to <32 x half>
- store <32 x half> %res, <32 x half>* %b
+ store <32 x half> %res, ptr %b
ret void
}
-define void @ucvtf_v64i32_v64f16(<64 x i32>* %a, <64 x half>* %b) vscale_range(16,0) #0 {
+define void @ucvtf_v64i32_v64f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: ucvtf_v64i32_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
+ %op1 = load <64 x i32>, ptr %a
%res = uitofp <64 x i32> %op1 to <64 x half>
- store <64 x half> %res, <64 x half>* %b
+ store <64 x half> %res, ptr %b
ret void
}
ret <4 x float> %res
}
-define void @ucvtf_v8i32_v8f32(<8 x i32>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @ucvtf_v8i32_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v8i32_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
+ %op1 = load <8 x i32>, ptr %a
%res = uitofp <8 x i32> %op1 to <8 x float>
- store <8 x float> %res, <8 x float>* %b
+ store <8 x float> %res, ptr %b
ret void
}
-define void @ucvtf_v16i32_v16f32(<16 x i32>* %a, <16 x float>* %b) #0 {
+define void @ucvtf_v16i32_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: ucvtf_v16i32_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: ucvtf z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%res = uitofp <16 x i32> %op1 to <16 x float>
- store <16 x float> %res, <16 x float>* %b
+ store <16 x float> %res, ptr %b
ret void
}
-define void @ucvtf_v32i32_v32f32(<32 x i32>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @ucvtf_v32i32_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: ucvtf_v32i32_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
+ %op1 = load <32 x i32>, ptr %a
%res = uitofp <32 x i32> %op1 to <32 x float>
- store <32 x float> %res, <32 x float>* %b
+ store <32 x float> %res, ptr %b
ret void
}
-define void @ucvtf_v64i32_v64f32(<64 x i32>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @ucvtf_v64i32_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: ucvtf_v64i32_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
+ %op1 = load <64 x i32>, ptr %a
%res = uitofp <64 x i32> %op1 to <64 x float>
- store <64 x float> %res, <64 x float>* %b
+ store <64 x float> %res, ptr %b
ret void
}
ret <2 x double> %res
}
-define void @ucvtf_v4i32_v4f64(<4 x i32>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @ucvtf_v4i32_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v4i32_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x i32>, <4 x i32>* %a
+ %op1 = load <4 x i32>, ptr %a
%res = uitofp <4 x i32> %op1 to <4 x double>
- store <4 x double> %res, <4 x double>* %b
+ store <4 x double> %res, ptr %b
ret void
}
-define void @ucvtf_v8i32_v8f64(<8 x i32>* %a, <8 x double>* %b) #0 {
+define void @ucvtf_v8i32_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: ucvtf_v8i32_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: ucvtf z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
+ %op1 = load <8 x i32>, ptr %a
%res = uitofp <8 x i32> %op1 to <8 x double>
- store <8 x double> %res, <8 x double>* %b
+ store <8 x double> %res, ptr %b
ret void
}
-define void @ucvtf_v16i32_v16f64(<16 x i32>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @ucvtf_v16i32_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: ucvtf_v16i32_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%res = uitofp <16 x i32> %op1 to <16 x double>
- store <16 x double> %res, <16 x double>* %b
+ store <16 x double> %res, ptr %b
ret void
}
-define void @ucvtf_v32i32_v32f64(<32 x i32>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @ucvtf_v32i32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: ucvtf_v32i32_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
+ %op1 = load <32 x i32>, ptr %a
%res = uitofp <32 x i32> %op1 to <32 x double>
- store <32 x double> %res, <32 x double>* %b
+ store <32 x double> %res, ptr %b
ret void
}
ret <2 x half> %res
}
-define <4 x half> @ucvtf_v4i64_v4f16(<4 x i64>* %a) vscale_range(2,0) #0 {
+define <4 x half> @ucvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v4i64_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
+ %op1 = load <4 x i64>, ptr %a
%res = uitofp <4 x i64> %op1 to <4 x half>
ret <4 x half> %res
}
-define <8 x half> @ucvtf_v8i64_v8f16(<8 x i64>* %a) #0 {
+define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%res = uitofp <8 x i64> %op1 to <8 x half>
ret <8 x half> %res
}
-define void @ucvtf_v16i64_v16f16(<16 x i64>* %a, <16 x half>* %b) vscale_range(8,0) #0 {
+define void @ucvtf_v16i64_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: ucvtf_v16i64_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
+ %op1 = load <16 x i64>, ptr %a
%res = uitofp <16 x i64> %op1 to <16 x half>
- store <16 x half> %res, <16 x half>* %b
+ store <16 x half> %res, ptr %b
ret void
}
-define void @ucvtf_v32i64_v32f16(<32 x i64>* %a, <32 x half>* %b) vscale_range(16,0) #0 {
+define void @ucvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: ucvtf_v32i64_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
+ %op1 = load <32 x i64>, ptr %a
%res = uitofp <32 x i64> %op1 to <32 x half>
- store <32 x half> %res, <32 x half>* %b
+ store <32 x half> %res, ptr %b
ret void
}
ret <2 x float> %res
}
-define <4 x float> @ucvtf_v4i64_v4f32(<4 x i64>* %a) vscale_range(2,0) #0 {
+define <4 x float> @ucvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v4i64_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
+ %op1 = load <4 x i64>, ptr %a
%res = uitofp <4 x i64> %op1 to <4 x float>
ret <4 x float> %res
}
-define void @ucvtf_v8i64_v8f32(<8 x i64>* %a, <8 x float>* %b) #0 {
+define void @ucvtf_v8i64_v8f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%res = uitofp <8 x i64> %op1 to <8 x float>
- store <8 x float> %res, <8 x float>* %b
+ store <8 x float> %res, ptr %b
ret void
}
-define void @ucvtf_v16i64_v16f32(<16 x i64>* %a, <16 x float>* %b) vscale_range(8,0) #0 {
+define void @ucvtf_v16i64_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: ucvtf_v16i64_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
+ %op1 = load <16 x i64>, ptr %a
%res = uitofp <16 x i64> %op1 to <16 x float>
- store <16 x float> %res, <16 x float>* %b
+ store <16 x float> %res, ptr %b
ret void
}
-define void @ucvtf_v32i64_v32f32(<32 x i64>* %a, <32 x float>* %b) vscale_range(16,0) #0 {
+define void @ucvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: ucvtf_v32i64_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
+ %op1 = load <32 x i64>, ptr %a
%res = uitofp <32 x i64> %op1 to <32 x float>
- store <32 x float> %res, <32 x float>* %b
+ store <32 x float> %res, ptr %b
ret void
}
ret <2 x double> %res
}
-define void @ucvtf_v4i64_v4f64(<4 x i64>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @ucvtf_v4i64_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: ucvtf_v4i64_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
+ %op1 = load <4 x i64>, ptr %a
%res = uitofp <4 x i64> %op1 to <4 x double>
- store <4 x double> %res, <4 x double>* %b
+ store <4 x double> %res, ptr %b
ret void
}
-define void @ucvtf_v8i64_v8f64(<8 x i64>* %a, <8 x double>* %b) #0 {
+define void @ucvtf_v8i64_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: ucvtf z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%res = uitofp <8 x i64> %op1 to <8 x double>
- store <8 x double> %res, <8 x double>* %b
+ store <8 x double> %res, ptr %b
ret void
}
-define void @ucvtf_v16i64_v16f64(<16 x i64>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @ucvtf_v16i64_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: ucvtf_v16i64_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
+ %op1 = load <16 x i64>, ptr %a
%res = uitofp <16 x i64> %op1 to <16 x double>
- store <16 x double> %res, <16 x double>* %b
+ store <16 x double> %res, ptr %b
ret void
}
-define void @ucvtf_v32i64_v32f64(<32 x i64>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @ucvtf_v32i64_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: ucvtf_v32i64_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
+ %op1 = load <32 x i64>, ptr %a
%res = uitofp <32 x i64> %op1 to <32 x double>
- store <32 x double> %res, <32 x double>* %b
+ store <32 x double> %res, ptr %b
ret void
}
}
; Don't use SVE for 128-bit vectors.
-define void @scvtf_v8i16_v8f16(<8 x i16>* %a, <8 x half>* %b) vscale_range(2,0) #0 {
+define void @scvtf_v8i16_v8f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v8i16_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: scvtf v0.8h, v0.8h
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x i16>, <8 x i16>* %a
+ %op1 = load <8 x i16>, ptr %a
%res = sitofp <8 x i16> %op1 to <8 x half>
- store <8 x half> %res, <8 x half>* %b
+ store <8 x half> %res, ptr %b
ret void
}
-define void @scvtf_v16i16_v16f16(<16 x i16>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @scvtf_v16i16_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v16i16_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
+ %op1 = load <16 x i16>, ptr %a
%res = sitofp <16 x i16> %op1 to <16 x half>
- store <16 x half> %res, <16 x half>* %b
+ store <16 x half> %res, ptr %b
ret void
}
-define void @scvtf_v32i16_v32f16(<32 x i16>* %a, <32 x half>* %b) #0 {
+define void @scvtf_v32i16_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: scvtf_v32i16_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: scvtf z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%res = sitofp <32 x i16> %op1 to <32 x half>
- store <32 x half> %res, <32 x half>* %b
+ store <32 x half> %res, ptr %b
ret void
}
-define void @scvtf_v64i16_v64f16(<64 x i16>* %a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @scvtf_v64i16_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: scvtf_v64i16_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
+ %op1 = load <64 x i16>, ptr %a
%res = sitofp <64 x i16> %op1 to <64 x half>
- store <64 x half> %res, <64 x half>* %b
+ store <64 x half> %res, ptr %b
ret void
}
-define void @scvtf_v128i16_v128f16(<128 x i16>* %a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @scvtf_v128i16_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: scvtf_v128i16_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
+ %op1 = load <128 x i16>, ptr %a
%res = sitofp <128 x i16> %op1 to <128 x half>
- store <128 x half> %res, <128 x half>* %b
+ store <128 x half> %res, ptr %b
ret void
}
ret <4 x float> %res
}
-define void @scvtf_v8i16_v8f32(<8 x i16>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @scvtf_v8i16_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v8i16_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x i16>, <8 x i16>* %a
+ %op1 = load <8 x i16>, ptr %a
%res = sitofp <8 x i16> %op1 to <8 x float>
- store <8 x float> %res, <8 x float>* %b
+ store <8 x float> %res, ptr %b
ret void
}
-define void @scvtf_v16i16_v16f32(<16 x i16>* %a, <16 x float>* %b) #0 {
+define void @scvtf_v16i16_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: scvtf_v16i16_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_512-NEXT: scvtf z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
+ %op1 = load <16 x i16>, ptr %a
%res = sitofp <16 x i16> %op1 to <16 x float>
- store <16 x float> %res, <16 x float>* %b
+ store <16 x float> %res, ptr %b
ret void
}
-define void @scvtf_v32i16_v32f32(<32 x i16>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @scvtf_v32i16_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: scvtf_v32i16_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%res = sitofp <32 x i16> %op1 to <32 x float>
- store <32 x float> %res, <32 x float>* %b
+ store <32 x float> %res, ptr %b
ret void
}
-define void @scvtf_v64i16_v64f32(<64 x i16>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @scvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: scvtf_v64i16_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
+ %op1 = load <64 x i16>, ptr %a
%res = sitofp <64 x i16> %op1 to <64 x float>
- store <64 x float> %res, <64 x float>* %b
+ store <64 x float> %res, ptr %b
ret void
}
ret <2 x double> %res
}
-define void @scvtf_v4i16_v4f64(<4 x i16>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @scvtf_v4i16_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v4i16_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x i16>, <4 x i16>* %a
+ %op1 = load <4 x i16>, ptr %a
%res = sitofp <4 x i16> %op1 to <4 x double>
- store <4 x double> %res, <4 x double>* %b
+ store <4 x double> %res, ptr %b
ret void
}
-define void @scvtf_v8i16_v8f64(<8 x i16>* %a, <8 x double>* %b) #0 {
+define void @scvtf_v8i16_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: scvtf_v8i16_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x0]
; VBITS_GE_512-NEXT: scvtf z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i16>, <8 x i16>* %a
+ %op1 = load <8 x i16>, ptr %a
%res = sitofp <8 x i16> %op1 to <8 x double>
- store <8 x double> %res, <8 x double>* %b
+ store <8 x double> %res, ptr %b
ret void
}
-define void @scvtf_v16i16_v16f64(<16 x i16>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: scvtf_v16i16_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
+ %op1 = load <16 x i16>, ptr %a
%res = sitofp <16 x i16> %op1 to <16 x double>
- store <16 x double> %res, <16 x double>* %b
+ store <16 x double> %res, ptr %b
ret void
}
-define void @scvtf_v32i16_v32f64(<32 x i16>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @scvtf_v32i16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: scvtf_v32i16_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%res = sitofp <32 x i16> %op1 to <32 x double>
- store <32 x double> %res, <32 x double>* %b
+ store <32 x double> %res, ptr %b
ret void
}
ret <4 x half> %res
}
-define <8 x half> @scvtf_v8i32_v8f16(<8 x i32>* %a) vscale_range(2,0) #0 {
+define <8 x half> @scvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v8i32_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
+ %op1 = load <8 x i32>, ptr %a
%res = sitofp <8 x i32> %op1 to <8 x half>
ret <8 x half> %res
}
-define void @scvtf_v16i32_v16f16(<16 x i32>* %a, <16 x half>* %b) #0 {
+define void @scvtf_v16i32_v16f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: scvtf_v16i32_v16f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%res = sitofp <16 x i32> %op1 to <16 x half>
- store <16 x half> %res, <16 x half>* %b
+ store <16 x half> %res, ptr %b
ret void
}
-define void @scvtf_v32i32_v32f16(<32 x i32>* %a, <32 x half>* %b) vscale_range(8,0) #0 {
+define void @scvtf_v32i32_v32f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: scvtf_v32i32_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
+ %op1 = load <32 x i32>, ptr %a
%res = sitofp <32 x i32> %op1 to <32 x half>
- store <32 x half> %res, <32 x half>* %b
+ store <32 x half> %res, ptr %b
ret void
}
-define void @scvtf_v64i32_v64f16(<64 x i32>* %a, <64 x half>* %b) vscale_range(16,0) #0 {
+define void @scvtf_v64i32_v64f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: scvtf_v64i32_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
+ %op1 = load <64 x i32>, ptr %a
%res = sitofp <64 x i32> %op1 to <64 x half>
- store <64 x half> %res, <64 x half>* %b
+ store <64 x half> %res, ptr %b
ret void
}
ret <4 x float> %res
}
-define void @scvtf_v8i32_v8f32(<8 x i32>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @scvtf_v8i32_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v8i32_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
+ %op1 = load <8 x i32>, ptr %a
%res = sitofp <8 x i32> %op1 to <8 x float>
- store <8 x float> %res, <8 x float>* %b
+ store <8 x float> %res, ptr %b
ret void
}
-define void @scvtf_v16i32_v16f32(<16 x i32>* %a, <16 x float>* %b) #0 {
+define void @scvtf_v16i32_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: scvtf_v16i32_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: scvtf z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%res = sitofp <16 x i32> %op1 to <16 x float>
- store <16 x float> %res, <16 x float>* %b
+ store <16 x float> %res, ptr %b
ret void
}
-define void @scvtf_v32i32_v32f32(<32 x i32>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @scvtf_v32i32_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: scvtf_v32i32_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
+ %op1 = load <32 x i32>, ptr %a
%res = sitofp <32 x i32> %op1 to <32 x float>
- store <32 x float> %res, <32 x float>* %b
+ store <32 x float> %res, ptr %b
ret void
}
-define void @scvtf_v64i32_v64f32(<64 x i32>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @scvtf_v64i32_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: scvtf_v64i32_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
+ %op1 = load <64 x i32>, ptr %a
%res = sitofp <64 x i32> %op1 to <64 x float>
- store <64 x float> %res, <64 x float>* %b
+ store <64 x float> %res, ptr %b
ret void
}
ret <2 x double> %res
}
-define void @scvtf_v4i32_v4f64(<4 x i32>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @scvtf_v4i32_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v4i32_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x i32>, <4 x i32>* %a
+ %op1 = load <4 x i32>, ptr %a
%res = sitofp <4 x i32> %op1 to <4 x double>
- store <4 x double> %res, <4 x double>* %b
+ store <4 x double> %res, ptr %b
ret void
}
-define void @scvtf_v8i32_v8f64(<8 x i32>* %a, <8 x double>* %b) #0 {
+define void @scvtf_v8i32_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: scvtf_v8i32_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: scvtf z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
+ %op1 = load <8 x i32>, ptr %a
%res = sitofp <8 x i32> %op1 to <8 x double>
- store <8 x double> %res, <8 x double>* %b
+ store <8 x double> %res, ptr %b
ret void
}
-define void @scvtf_v16i32_v16f64(<16 x i32>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @scvtf_v16i32_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: scvtf_v16i32_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%res = sitofp <16 x i32> %op1 to <16 x double>
- store <16 x double> %res, <16 x double>* %b
+ store <16 x double> %res, ptr %b
ret void
}
-define void @scvtf_v32i32_v32f64(<32 x i32>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @scvtf_v32i32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: scvtf_v32i32_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
+ %op1 = load <32 x i32>, ptr %a
%res = sitofp <32 x i32> %op1 to <32 x double>
- store <32 x double> %res, <32 x double>* %b
+ store <32 x double> %res, ptr %b
ret void
}
ret <2 x half> %res
}
-define <4 x half> @scvtf_v4i64_v4f16(<4 x i64>* %a) vscale_range(2,0) #0 {
+define <4 x half> @scvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v4i64_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
+ %op1 = load <4 x i64>, ptr %a
%res = sitofp <4 x i64> %op1 to <4 x half>
ret <4 x half> %res
}
-define <8 x half> @scvtf_v8i64_v8f16(<8 x i64>* %a) #0 {
+define <8 x half> @scvtf_v8i64_v8f16(ptr %a) #0 {
; VBITS_GE_256-LABEL: scvtf_v8i64_v8f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%res = sitofp <8 x i64> %op1 to <8 x half>
ret <8 x half> %res
}
-define void @scvtf_v16i64_v16f16(<16 x i64>* %a, <16 x half>* %b) vscale_range(8,0) #0 {
+define void @scvtf_v16i64_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: scvtf_v16i64_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
+ %op1 = load <16 x i64>, ptr %a
%res = sitofp <16 x i64> %op1 to <16 x half>
- store <16 x half> %res, <16 x half>* %b
+ store <16 x half> %res, ptr %b
ret void
}
-define void @scvtf_v32i64_v32f16(<32 x i64>* %a, <32 x half>* %b) vscale_range(16,0) #0 {
+define void @scvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: scvtf_v32i64_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
+ %op1 = load <32 x i64>, ptr %a
%res = sitofp <32 x i64> %op1 to <32 x half>
- store <32 x half> %res, <32 x half>* %b
+ store <32 x half> %res, ptr %b
ret void
}
ret <2 x float> %res
}
-define <4 x float> @scvtf_v4i64_v4f32(<4 x i64>* %a) vscale_range(2,0) #0 {
+define <4 x float> @scvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v4i64_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
+ %op1 = load <4 x i64>, ptr %a
%res = sitofp <4 x i64> %op1 to <4 x float>
ret <4 x float> %res
}
-define void @scvtf_v8i64_v8f32(<8 x i64>* %a, <8 x float>* %b) #0 {
+define void @scvtf_v8i64_v8f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: scvtf_v8i64_v8f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%res = sitofp <8 x i64> %op1 to <8 x float>
- store <8 x float> %res, <8 x float>* %b
+ store <8 x float> %res, ptr %b
ret void
}
-define void @scvtf_v16i64_v16f32(<16 x i64>* %a, <16 x float>* %b) vscale_range(8,0) #0 {
+define void @scvtf_v16i64_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: scvtf_v16i64_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
+ %op1 = load <16 x i64>, ptr %a
%res = sitofp <16 x i64> %op1 to <16 x float>
- store <16 x float> %res, <16 x float>* %b
+ store <16 x float> %res, ptr %b
ret void
}
-define void @scvtf_v32i64_v32f32(<32 x i64>* %a, <32 x float>* %b) vscale_range(16,0) #0 {
+define void @scvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: scvtf_v32i64_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
+ %op1 = load <32 x i64>, ptr %a
%res = sitofp <32 x i64> %op1 to <32 x float>
- store <32 x float> %res, <32 x float>* %b
+ store <32 x float> %res, ptr %b
ret void
}
ret <2 x double> %res
}
-define void @scvtf_v4i64_v4f64(<4 x i64>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @scvtf_v4i64_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: scvtf_v4i64_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
+ %op1 = load <4 x i64>, ptr %a
%res = sitofp <4 x i64> %op1 to <4 x double>
- store <4 x double> %res, <4 x double>* %b
+ store <4 x double> %res, ptr %b
ret void
}
-define void @scvtf_v8i64_v8f64(<8 x i64>* %a, <8 x double>* %b) #0 {
+define void @scvtf_v8i64_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: scvtf_v8i64_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: scvtf z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%res = sitofp <8 x i64> %op1 to <8 x double>
- store <8 x double> %res, <8 x double>* %b
+ store <8 x double> %res, ptr %b
ret void
}
-define void @scvtf_v16i64_v16f64(<16 x i64>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @scvtf_v16i64_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: scvtf_v16i64_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
+ %op1 = load <16 x i64>, ptr %a
%res = sitofp <16 x i64> %op1 to <16 x double>
- store <16 x double> %res, <16 x double>* %b
+ store <16 x double> %res, ptr %b
ret void
}
-define void @scvtf_v32i64_v32f64(<32 x i64>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @scvtf_v32i64_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: scvtf_v32i64_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
+ %op1 = load <32 x i64>, ptr %a
%res = sitofp <32 x i64> %op1 to <32 x double>
- store <32 x double> %res, <32 x double>* %b
+ store <32 x double> %res, ptr %b
ret void
}
ret <16 x i8> %sel
}
-define void @select_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @select_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%mask = icmp eq <32 x i8> %op1, %op2
%sel = select <32 x i1> %mask, <32 x i8> %op1, <32 x i8> %op2
- store <32 x i8> %sel, <32 x i8>* %a
+ store <32 x i8> %sel, ptr %a
ret void
}
-define void @select_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @select_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: select_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: sel z0.b, p1, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%mask = icmp eq <64 x i8> %op1, %op2
%sel = select <64 x i1> %mask, <64 x i8> %op1, <64 x i8> %op2
- store <64 x i8> %sel, <64 x i8>* %a
+ store <64 x i8> %sel, ptr %a
ret void
}
-define void @select_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @select_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%mask = icmp eq <128 x i8> %op1, %op2
%sel = select <128 x i1> %mask, <128 x i8> %op1, <128 x i8> %op2
- store <128 x i8> %sel, <128 x i8>* %a
+ store <128 x i8> %sel, ptr %a
ret void
}
-define void @select_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @select_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%mask = icmp eq <256 x i8> %op1, %op2
%sel = select <256 x i1> %mask, <256 x i8> %op1, <256 x i8> %op2
- store <256 x i8> %sel, <256 x i8>* %a
+ store <256 x i8> %sel, ptr %a
ret void
}
ret <8 x i16> %sel
}
-define void @select_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @select_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%mask = icmp eq <16 x i16> %op1, %op2
%sel = select <16 x i1> %mask, <16 x i16> %op1, <16 x i16> %op2
- store <16 x i16> %sel, <16 x i16>* %a
+ store <16 x i16> %sel, ptr %a
ret void
}
-define void @select_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @select_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: select_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%mask = icmp eq <32 x i16> %op1, %op2
%sel = select <32 x i1> %mask, <32 x i16> %op1, <32 x i16> %op2
- store <32 x i16> %sel, <32 x i16>* %a
+ store <32 x i16> %sel, ptr %a
ret void
}
-define void @select_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @select_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%mask = icmp eq <64 x i16> %op1, %op2
%sel = select <64 x i1> %mask, <64 x i16> %op1, <64 x i16> %op2
- store <64 x i16> %sel, <64 x i16>* %a
+ store <64 x i16> %sel, ptr %a
ret void
}
-define void @select_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @select_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%mask = icmp eq <128 x i16> %op1, %op2
%sel = select <128 x i1> %mask, <128 x i16> %op1, <128 x i16> %op2
- store <128 x i16> %sel, <128 x i16>* %a
+ store <128 x i16> %sel, ptr %a
ret void
}
ret <4 x i32> %sel
}
-define void @select_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @select_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%mask = icmp eq <8 x i32> %op1, %op2
%sel = select <8 x i1> %mask, <8 x i32> %op1, <8 x i32> %op2
- store <8 x i32> %sel, <8 x i32>* %a
+ store <8 x i32> %sel, ptr %a
ret void
}
-define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @select_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: select_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%mask = icmp eq <16 x i32> %op1, %op2
%sel = select <16 x i1> %mask, <16 x i32> %op1, <16 x i32> %op2
- store <16 x i32> %sel, <16 x i32>* %a
+ store <16 x i32> %sel, ptr %a
ret void
}
-define void @select_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @select_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%mask = icmp eq <32 x i32> %op1, %op2
%sel = select <32 x i1> %mask, <32 x i32> %op1, <32 x i32> %op2
- store <32 x i32> %sel, <32 x i32>* %a
+ store <32 x i32> %sel, ptr %a
ret void
}
-define void @select_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @select_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%mask = icmp eq <64 x i32> %op1, %op2
%sel = select <64 x i1> %mask, <64 x i32> %op1, <64 x i32> %op2
- store <64 x i32> %sel, <64 x i32>* %a
+ store <64 x i32> %sel, ptr %a
ret void
}
ret <2 x i64> %sel
}
-define void @select_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @select_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%mask = icmp eq <4 x i64> %op1, %op2
%sel = select <4 x i1> %mask, <4 x i64> %op1, <4 x i64> %op2
- store <4 x i64> %sel, <4 x i64>* %a
+ store <4 x i64> %sel, ptr %a
ret void
}
-define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @select_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: select_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%mask = icmp eq <8 x i64> %op1, %op2
%sel = select <8 x i1> %mask, <8 x i64> %op1, <8 x i64> %op2
- store <8 x i64> %sel, <8 x i64>* %a
+ store <8 x i64> %sel, ptr %a
ret void
}
-define void @select_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @select_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%mask = icmp eq <16 x i64> %op1, %op2
%sel = select <16 x i1> %mask, <16 x i64> %op1, <16 x i64> %op2
- store <16 x i64> %sel, <16 x i64>* %a
+ store <16 x i64> %sel, ptr %a
ret void
}
-define void @select_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @select_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%mask = icmp eq <32 x i64> %op1, %op2
%sel = select <32 x i1> %mask, <32 x i64> %op1, <32 x i64> %op2
- store <32 x i64> %sel, <32 x i64>* %a
+ store <32 x i64> %sel, ptr %a
ret void
}
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
-define <4 x i32> @test(<16 x i32>* %arg1, <16 x i32>* %arg2) {
+define <4 x i32> @test(ptr %arg1, ptr %arg2) {
; CHECK-LABEL: test:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov x8, #8
; CHECK-NEXT: st1w { z2.s }, p0, [x0]
; CHECK-NEXT: ret
entry:
- %0 = load <16 x i32>, <16 x i32>* %arg1, align 256
- %1 = load <16 x i32>, <16 x i32>* %arg2, align 256
+ %0 = load <16 x i32>, ptr %arg1, align 256
+ %1 = load <16 x i32>, ptr %arg2, align 256
%shvec = shufflevector <16 x i32> %0, <16 x i32> %1, <4 x i32> <i32 14, i32 14, i32 14, i32 14>
%2 = add <16 x i32> %0, %0
- store <16 x i32> %2, <16 x i32>* %arg1, align 256
+ store <16 x i32> %2, ptr %arg1, align 256
ret <4 x i32> %shvec
}
-define <2 x i32> @test2(<16 x i32>* %arg1, <16 x i32>* %arg2) {
+define <2 x i32> @test2(ptr %arg1, ptr %arg2) {
; CHECK-LABEL: test2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov x8, #8
; CHECK-NEXT: st1w { z2.s }, p0, [x0]
; CHECK-NEXT: ret
entry:
- %0 = load <16 x i32>, <16 x i32>* %arg1, align 256
- %1 = load <16 x i32>, <16 x i32>* %arg2, align 256
+ %0 = load <16 x i32>, ptr %arg1, align 256
+ %1 = load <16 x i32>, ptr %arg2, align 256
%shvec = shufflevector <16 x i32> %0, <16 x i32> %1, <2 x i32> <i32 14, i32 14>
%2 = add <16 x i32> %0, %0
- store <16 x i32> %2, <16 x i32>* %arg1, align 256
+ store <16 x i32> %2, ptr %arg1, align 256
ret <2 x i32> %shvec
}
target triple = "aarch64-unknown-linux-gnu"
; Don't use SVE for 64-bit vectors.
-define <2 x float> @load_v2f32(<2 x float>* %a) #0 {
+define <2 x float> @load_v2f32(ptr %a) #0 {
; CHECK-LABEL: load_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
- %load = load <2 x float>, <2 x float>* %a
+ %load = load <2 x float>, ptr %a
ret <2 x float> %load
}
; Don't use SVE for 128-bit vectors.
-define <4 x float> @load_v4f32(<4 x float>* %a) #0 {
+define <4 x float> @load_v4f32(ptr %a) #0 {
; CHECK-LABEL: load_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
- %load = load <4 x float>, <4 x float>* %a
+ %load = load <4 x float>, ptr %a
ret <4 x float> %load
}
-define <8 x float> @load_v8f32(<8 x float>* %a) #0 {
+define <8 x float> @load_v8f32(ptr %a) #0 {
; CHECK-LABEL: load_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %load = load <8 x float>, <8 x float>* %a
+ %load = load <8 x float>, ptr %a
ret <8 x float> %load
}
-define <16 x float> @load_v16f32(<16 x float>* %a) #0 {
+define <16 x float> @load_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: load_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #8
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
- %load = load <16 x float>, <16 x float>* %a
+ %load = load <16 x float>, ptr %a
ret <16 x float> %load
}
-define <32 x float> @load_v32f32(<32 x float>* %a) #0 {
+define <32 x float> @load_v32f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: load_v32f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #16
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
- %load = load <32 x float>, <32 x float>* %a
+ %load = load <32 x float>, ptr %a
ret <32 x float> %load
}
-define <64 x float> @load_v64f32(<64 x float>* %a) #0 {
+define <64 x float> @load_v64f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: load_v64f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #8
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
- %load = load <64 x float>, <64 x float>* %a
+ %load = load <64 x float>, ptr %a
ret <64 x float> %load
}
ret i8 %res
}
-define i8 @andv_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define i8 @andv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i8>, <32 x i8>* %a
+ %op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %op)
ret i8 %res
}
-define i8 @andv_v64i8(<64 x i8>* %a) #0 {
+define i8 @andv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: andv b0, p0, z0.b
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <64 x i8>, <64 x i8>* %a
+ %op = load <64 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> %op)
ret i8 %res
}
-define i8 @andv_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define i8 @andv_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: andv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i8>, <128 x i8>* %a
+ %op = load <128 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> %op)
ret i8 %res
}
-define i8 @andv_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define i8 @andv_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: andv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <256 x i8>, <256 x i8>* %a
+ %op = load <256 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.and.v256i8(<256 x i8> %op)
ret i8 %res
}
ret i16 %res
}
-define i16 @andv_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define i16 @andv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %op)
ret i16 %res
}
-define i16 @andv_v32i16(<32 x i16>* %a) #0 {
+define i16 @andv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: andv h0, p0, z0.h
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> %op)
ret i16 %res
}
-define i16 @andv_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define i16 @andv_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: andv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> %op)
ret i16 %res
}
-define i16 @andv_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define i16 @andv_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: andv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> %op)
ret i16 %res
}
ret i32 %res
}
-define i32 @andv_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define i32 @andv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %op)
ret i32 %res
}
-define i32 @andv_v16i32(<16 x i32>* %a) #0 {
+define i32 @andv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: andv s0, p0, z0.s
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %op)
ret i32 %res
}
-define i32 @andv_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define i32 @andv_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: andv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> %op)
ret i32 %res
}
-define i32 @andv_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define i32 @andv_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: andv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> %op)
ret i32 %res
}
ret i64 %res
}
-define i64 @andv_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define i64 @andv_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: andv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %op)
ret i64 %res
}
-define i64 @andv_v8i64(<8 x i64>* %a) #0 {
+define i64 @andv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: andv d0, p0, z0.d
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %op)
ret i64 %res
}
-define i64 @andv_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define i64 @andv_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: andv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: andv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> %op)
ret i64 %res
}
-define i64 @andv_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define i64 @andv_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: andv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: andv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> %op)
ret i64 %res
}
ret i8 %res
}
-define i8 @eorv_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define i8 @eorv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: eorv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i8>, <32 x i8>* %a
+ %op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %op)
ret i8 %res
}
-define i8 @eorv_v64i8(<64 x i8>* %a) #0 {
+define i8 @eorv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: eorv b0, p0, z0.b
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <64 x i8>, <64 x i8>* %a
+ %op = load <64 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> %op)
ret i8 %res
}
-define i8 @eorv_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define i8 @eorv_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: eorv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: eorv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i8>, <128 x i8>* %a
+ %op = load <128 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> %op)
ret i8 %res
}
-define i8 @eorv_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define i8 @eorv_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: eorv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: eorv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <256 x i8>, <256 x i8>* %a
+ %op = load <256 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.xor.v256i8(<256 x i8> %op)
ret i8 %res
}
ret i16 %res
}
-define i16 @eorv_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define i16 @eorv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %op)
ret i16 %res
}
-define i16 @eorv_v32i16(<32 x i16>* %a) #0 {
+define i16 @eorv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: eorv h0, p0, z0.h
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> %op)
ret i16 %res
}
-define i16 @eorv_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define i16 @eorv_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: eorv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> %op)
ret i16 %res
}
-define i16 @eorv_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define i16 @eorv_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: eorv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> %op)
ret i16 %res
}
ret i32 %res
}
-define i32 @eorv_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define i32 @eorv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %op)
ret i32 %res
}
-define i32 @eorv_v16i32(<16 x i32>* %a) #0 {
+define i32 @eorv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: eorv s0, p0, z0.s
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %op)
ret i32 %res
}
-define i32 @eorv_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define i32 @eorv_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: eorv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> %op)
ret i32 %res
}
-define i32 @eorv_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define i32 @eorv_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: eorv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> %op)
ret i32 %res
}
ret i64 %res
}
-define i64 @eorv_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define i64 @eorv_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: eorv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %op)
ret i64 %res
}
-define i64 @eorv_v8i64(<8 x i64>* %a) #0 {
+define i64 @eorv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: eorv d0, p0, z0.d
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> %op)
ret i64 %res
}
-define i64 @eorv_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define i64 @eorv_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: eorv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: eorv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> %op)
ret i64 %res
}
-define i64 @eorv_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define i64 @eorv_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: eorv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: eorv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> %op)
ret i64 %res
}
ret i8 %res
}
-define i8 @orv_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define i8 @orv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i8>, <32 x i8>* %a
+ %op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %op)
ret i8 %res
}
-define i8 @orv_v64i8(<64 x i8>* %a) #0 {
+define i8 @orv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: orv b0, p0, z0.b
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <64 x i8>, <64 x i8>* %a
+ %op = load <64 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> %op)
ret i8 %res
}
-define i8 @orv_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define i8 @orv_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: orv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i8>, <128 x i8>* %a
+ %op = load <128 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> %op)
ret i8 %res
}
-define i8 @orv_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define i8 @orv_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: orv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <256 x i8>, <256 x i8>* %a
+ %op = load <256 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.or.v256i8(<256 x i8> %op)
ret i8 %res
}
ret i16 %res
}
-define i16 @orv_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define i16 @orv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %op)
ret i16 %res
}
-define i16 @orv_v32i16(<32 x i16>* %a) #0 {
+define i16 @orv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: orv h0, p0, z0.h
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> %op)
ret i16 %res
}
-define i16 @orv_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define i16 @orv_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: orv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> %op)
ret i16 %res
}
-define i16 @orv_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define i16 @orv_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: orv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> %op)
ret i16 %res
}
ret i32 %res
}
-define i32 @orv_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define i32 @orv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %op)
ret i32 %res
}
-define i32 @orv_v16i32(<16 x i32>* %a) #0 {
+define i32 @orv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: orv s0, p0, z0.s
; VBITS_GE_512-NEXT: fmov w0, s0
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %op)
ret i32 %res
}
-define i32 @orv_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define i32 @orv_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: orv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> %op)
ret i32 %res
}
-define i32 @orv_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define i32 @orv_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: orv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> %op)
ret i32 %res
}
ret i64 %res
}
-define i64 @orv_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define i64 @orv_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: orv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %op)
ret i64 %res
}
-define i64 @orv_v8i64(<8 x i64>* %a) #0 {
+define i64 @orv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: orv d0, p0, z0.d
; VBITS_GE_512-NEXT: fmov x0, d0
; VBITS_GE_512-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %op)
ret i64 %res
}
-define i64 @orv_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define i64 @orv_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: orv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: orv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %op)
ret i64 %res
}
-define i64 @orv_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define i64 @orv_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: orv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: orv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> %op)
ret i64 %res
}
; LD1B
;
-define void @masked_gather_v2i8(<2 x i8>* %a, <2 x i8*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v2i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: st1b { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <2 x i8*>, <2 x i8*>* %b
- %vals = call <2 x i8> @llvm.masked.gather.v2i8(<2 x i8*> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i8> undef)
- store <2 x i8> %vals, <2 x i8>* %a
+ %ptrs = load <2 x ptr>, ptr %b
+ %vals = call <2 x i8> @llvm.masked.gather.v2i8(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i8> undef)
+ store <2 x i8> %vals, ptr %a
ret void
}
-define void @masked_gather_v4i8(<4 x i8>* %a, <4 x i8*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v4i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: st1b { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <4 x i8*>, <4 x i8*>* %b
- %vals = call <4 x i8> @llvm.masked.gather.v4i8(<4 x i8*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
- store <4 x i8> %vals, <4 x i8>* %a
+ %ptrs = load <4 x ptr>, ptr %b
+ %vals = call <4 x i8> @llvm.masked.gather.v4i8(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+ store <4 x i8> %vals, ptr %a
ret void
}
-define void @masked_gather_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 {
+define void @masked_gather_v8i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_gather_v8i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: str d0, [x0]
; VBITS_GE_512-NEXT: ret
- %ptrs = load <8 x i8*>, <8 x i8*>* %b
- %vals = call <8 x i8> @llvm.masked.gather.v8i8(<8 x i8*> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
- store <8 x i8> %vals, <8 x i8>* %a
+ %ptrs = load <8 x ptr>, ptr %b
+ %vals = call <8 x i8> @llvm.masked.gather.v8i8(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
+ store <8 x i8> %vals, ptr %a
ret void
}
-define void @masked_gather_v16i8(<16 x i8>* %a, <16 x i8*>* %b) vscale_range(8,0) #0 {
+define void @masked_gather_v16i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <16 x i8*>, <16 x i8*>* %b
- %vals = call <16 x i8> @llvm.masked.gather.v16i8(<16 x i8*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
+ %ptrs = load <16 x ptr>, ptr %b
+ %vals = call <16 x i8> @llvm.masked.gather.v16i8(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
- store <16 x i8> %vals, <16 x i8>* %a
+ store <16 x i8> %vals, ptr %a
ret void
}
-define void @masked_gather_v32i8(<32 x i8>* %a, <32 x i8*>* %b) vscale_range(16,0) #0 {
+define void @masked_gather_v32i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: st1b { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <32 x i8*>, <32 x i8*>* %b
- %vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x i8*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
+ %ptrs = load <32 x ptr>, ptr %b
+ %vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
- store <32 x i8> %vals, <32 x i8>* %a
+ store <32 x i8> %vals, ptr %a
ret void
}
; LD1H
;
-define void @masked_gather_v2i16(<2 x i16>* %a, <2 x i16*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v2i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <2 x i16*>, <2 x i16*>* %b
- %vals = call <2 x i16> @llvm.masked.gather.v2i16(<2 x i16*> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i16> undef)
- store <2 x i16> %vals, <2 x i16>* %a
+ %ptrs = load <2 x ptr>, ptr %b
+ %vals = call <2 x i16> @llvm.masked.gather.v2i16(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i16> undef)
+ store <2 x i16> %vals, ptr %a
ret void
}
-define void @masked_gather_v4i16(<4 x i16>* %a, <4 x i16*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v4i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <4 x i16*>, <4 x i16*>* %b
- %vals = call <4 x i16> @llvm.masked.gather.v4i16(<4 x i16*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
- store <4 x i16> %vals, <4 x i16>* %a
+ %ptrs = load <4 x ptr>, ptr %b
+ %vals = call <4 x i16> @llvm.masked.gather.v4i16(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
+ store <4 x i16> %vals, ptr %a
ret void
}
-define void @masked_gather_v8i16(<8 x i16>* %a, <8 x i16*>* %b) #0 {
+define void @masked_gather_v8i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_gather_v8i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: str q0, [x0]
; VBITS_GE_512-NEXT: ret
- %ptrs = load <8 x i16*>, <8 x i16*>* %b
- %vals = call <8 x i16> @llvm.masked.gather.v8i16(<8 x i16*> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
- store <8 x i16> %vals, <8 x i16>* %a
+ %ptrs = load <8 x ptr>, ptr %b
+ %vals = call <8 x i16> @llvm.masked.gather.v8i16(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
+ store <8 x i16> %vals, ptr %a
ret void
}
-define void @masked_gather_v16i16(<16 x i16>* %a, <16 x i16*>* %b) vscale_range(8,0) #0 {
+define void @masked_gather_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: st1h { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <16 x i16*>, <16 x i16*>* %b
- %vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x i16*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
+ %ptrs = load <16 x ptr>, ptr %b
+ %vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> undef)
- store <16 x i16> %vals, <16 x i16>* %a
+ store <16 x i16> %vals, ptr %a
ret void
}
-define void @masked_gather_v32i16(<32 x i16>* %a, <32 x i16*>* %b) vscale_range(16,0) #0 {
+define void @masked_gather_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: st1h { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <32 x i16*>, <32 x i16*>* %b
- %vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x i16*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
+ %ptrs = load <32 x ptr>, ptr %b
+ %vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i16> undef)
- store <32 x i16> %vals, <32 x i16>* %a
+ store <32 x i16> %vals, ptr %a
ret void
}
; LD1W
;
-define void @masked_gather_v2i32(<2 x i32>* %a, <2 x i32*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v2i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <2 x i32*>, <2 x i32*>* %b
- %vals = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
- store <2 x i32> %vals, <2 x i32>* %a
+ %ptrs = load <2 x ptr>, ptr %b
+ %vals = call <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
+ store <2 x i32> %vals, ptr %a
ret void
}
-define void @masked_gather_v4i32(<4 x i32>* %a, <4 x i32*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v4i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <4 x i32*>, <4 x i32*>* %b
- %vals = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
- store <4 x i32> %vals, <4 x i32>* %a
+ %ptrs = load <4 x ptr>, ptr %b
+ %vals = call <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ store <4 x i32> %vals, ptr %a
ret void
}
-define void @masked_gather_v8i32(<8 x i32>* %a, <8 x i32*>* %b) #0 {
+define void @masked_gather_v8i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_gather_v8i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [z0.d]
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %ptrs = load <8 x i32*>, <8 x i32*>* %b
- %vals = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
- store <8 x i32> %vals, <8 x i32>* %a
+ %ptrs = load <8 x ptr>, ptr %b
+ %vals = call <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+ store <8 x i32> %vals, ptr %a
ret void
}
-define void @masked_gather_v16i32(<16 x i32>* %a, <16 x i32*>* %b) vscale_range(8,0) #0 {
+define void @masked_gather_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <16 x i32*>, <16 x i32*>* %b
- %vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
+ %ptrs = load <16 x ptr>, ptr %b
+ %vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> undef)
- store <16 x i32> %vals, <16 x i32>* %a
+ store <16 x i32> %vals, ptr %a
ret void
}
-define void @masked_gather_v32i32(<32 x i32>* %a, <32 x i32*>* %b) vscale_range(16,0) #0 {
+define void @masked_gather_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <32 x i32*>, <32 x i32*>* %b
- %vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x i32*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
+ %ptrs = load <32 x ptr>, ptr %b
+ %vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i32> undef)
- store <32 x i32> %vals, <32 x i32>* %a
+ store <32 x i32> %vals, ptr %a
ret void
}
; LD1D
;
-define void @masked_gather_v2i64(<2 x i64>* %a, <2 x i64*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <2 x i64*>, <2 x i64*>* %b
- %vals = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> undef)
- store <2 x i64> %vals, <2 x i64>* %a
+ %ptrs = load <2 x ptr>, ptr %b
+ %vals = call <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> undef)
+ store <2 x i64> %vals, ptr %a
ret void
}
-define void @masked_gather_v4i64(<4 x i64>* %a, <4 x i64*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <4 x i64*>, <4 x i64*>* %b
- %vals = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> undef)
- store <4 x i64> %vals, <4 x i64>* %a
+ %ptrs = load <4 x ptr>, ptr %b
+ %vals = call <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> undef)
+ store <4 x i64> %vals, ptr %a
ret void
}
-define void @masked_gather_v8i64(<8 x i64>* %a, <8 x i64*>* %b) #0 {
+define void @masked_gather_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_gather_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [z0.d]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %ptrs = load <8 x i64*>, <8 x i64*>* %b
- %vals = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i64> undef)
- store <8 x i64> %vals, <8 x i64>* %a
+ %ptrs = load <8 x ptr>, ptr %b
+ %vals = call <8 x i64> @llvm.masked.gather.v8i64(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i64> undef)
+ store <8 x i64> %vals, ptr %a
ret void
}
-define void @masked_gather_v16i64(<16 x i64>* %a, <16 x i64*>* %b) vscale_range(8,0) #0 {
+define void @masked_gather_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <16 x i64*>, <16 x i64*>* %b
- %vals = call <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
+ %ptrs = load <16 x ptr>, ptr %b
+ %vals = call <16 x i64> @llvm.masked.gather.v16i64(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i64> undef)
- store <16 x i64> %vals, <16 x i64>* %a
+ store <16 x i64> %vals, ptr %a
ret void
}
-define void @masked_gather_v32i64(<32 x i64>* %a, <32 x i64*>* %b) vscale_range(16,0) #0 {
+define void @masked_gather_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %ptrs = load <32 x i64*>, <32 x i64*>* %b
- %vals = call <32 x i64> @llvm.masked.gather.v32i64(<32 x i64*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
+ %ptrs = load <32 x ptr>, ptr %b
+ %vals = call <32 x i64> @llvm.masked.gather.v32i64(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i64> undef)
- store <32 x i64> %vals, <32 x i64>* %a
+ store <32 x i64> %vals, ptr %a
ret void
}
-declare <2 x i8> @llvm.masked.gather.v2i8(<2 x i8*>, i32, <2 x i1>, <2 x i8>)
-declare <4 x i8> @llvm.masked.gather.v4i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>)
-declare <8 x i8> @llvm.masked.gather.v8i8(<8 x i8*>, i32, <8 x i1>, <8 x i8>)
-declare <16 x i8> @llvm.masked.gather.v16i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>)
-declare <32 x i8> @llvm.masked.gather.v32i8(<32 x i8*>, i32, <32 x i1>, <32 x i8>)
+declare <2 x i8> @llvm.masked.gather.v2i8(<2 x ptr>, i32, <2 x i1>, <2 x i8>)
+declare <4 x i8> @llvm.masked.gather.v4i8(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
+declare <8 x i8> @llvm.masked.gather.v8i8(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
+declare <16 x i8> @llvm.masked.gather.v16i8(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
+declare <32 x i8> @llvm.masked.gather.v32i8(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
-declare <2 x i16> @llvm.masked.gather.v2i16(<2 x i16*>, i32, <2 x i1>, <2 x i16>)
-declare <4 x i16> @llvm.masked.gather.v4i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>)
-declare <8 x i16> @llvm.masked.gather.v8i16(<8 x i16*>, i32, <8 x i1>, <8 x i16>)
-declare <16 x i16> @llvm.masked.gather.v16i16(<16 x i16*>, i32, <16 x i1>, <16 x i16>)
-declare <32 x i16> @llvm.masked.gather.v32i16(<32 x i16*>, i32, <32 x i1>, <32 x i16>)
+declare <2 x i16> @llvm.masked.gather.v2i16(<2 x ptr>, i32, <2 x i1>, <2 x i16>)
+declare <4 x i16> @llvm.masked.gather.v4i16(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
+declare <8 x i16> @llvm.masked.gather.v8i16(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
+declare <16 x i16> @llvm.masked.gather.v16i16(<16 x ptr>, i32, <16 x i1>, <16 x i16>)
+declare <32 x i16> @llvm.masked.gather.v32i16(<32 x ptr>, i32, <32 x i1>, <32 x i16>)
-declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
-declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
-declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>)
-declare <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
-declare <32 x i32> @llvm.masked.gather.v32i32(<32 x i32*>, i32, <32 x i1>, <32 x i32>)
+declare <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
+declare <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
+declare <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
+declare <16 x i32> @llvm.masked.gather.v16i32(<16 x ptr>, i32, <16 x i1>, <16 x i32>)
+declare <32 x i32> @llvm.masked.gather.v32i32(<32 x ptr>, i32, <32 x i1>, <32 x i32>)
-declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
-declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
-declare <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>)
-declare <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*>, i32, <16 x i1>, <16 x i64>)
-declare <32 x i64> @llvm.masked.gather.v32i64(<32 x i64*>, i32, <32 x i1>, <32 x i64>)
+declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
+declare <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr>, i32, <4 x i1>, <4 x i64>)
+declare <8 x i64> @llvm.masked.gather.v8i64(<8 x ptr>, i32, <8 x i1>, <8 x i64>)
+declare <16 x i64> @llvm.masked.gather.v16i64(<16 x ptr>, i32, <16 x i1>, <16 x i64>)
+declare <32 x i64> @llvm.masked.gather.v32i64(<32 x ptr>, i32, <32 x i1>, <32 x i64>)
attributes #0 = { "target-features"="+sve" }
; LD1B
;
-define void @masked_gather_v2i8(<2 x i8>* %a, <2 x i8*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v2i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: st1b { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %cval = load <2 x i8>, <2 x i8>* %a
- %ptrs = load <2 x i8*>, <2 x i8*>* %b
+ %cval = load <2 x i8>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = icmp eq <2 x i8> %cval, zeroinitializer
- %vals = call <2 x i8> @llvm.masked.gather.v2i8(<2 x i8*> %ptrs, i32 8, <2 x i1> %mask, <2 x i8> undef)
- store <2 x i8> %vals, <2 x i8>* %a
+ %vals = call <2 x i8> @llvm.masked.gather.v2i8(<2 x ptr> %ptrs, i32 8, <2 x i1> %mask, <2 x i8> undef)
+ store <2 x i8> %vals, ptr %a
ret void
}
-define void @masked_gather_v4i8(<4 x i8>* %a, <4 x i8*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v4i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ld1b { z0.d }, p1/z, [z1.d]
; CHECK-NEXT: st1b { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %cval = load <4 x i8>, <4 x i8>* %a
- %ptrs = load <4 x i8*>, <4 x i8*>* %b
+ %cval = load <4 x i8>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = icmp eq <4 x i8> %cval, zeroinitializer
- %vals = call <4 x i8> @llvm.masked.gather.v4i8(<4 x i8*> %ptrs, i32 8, <4 x i1> %mask, <4 x i8> undef)
- store <4 x i8> %vals, <4 x i8>* %a
+ %vals = call <4 x i8> @llvm.masked.gather.v4i8(<4 x ptr> %ptrs, i32 8, <4 x i1> %mask, <4 x i8> undef)
+ store <4 x i8> %vals, ptr %a
ret void
}
-define void @masked_gather_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 {
+define void @masked_gather_v8i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_gather_v8i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr d0, [x0]
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: str d0, [x0]
; VBITS_GE_512-NEXT: ret
- %cval = load <8 x i8>, <8 x i8>* %a
- %ptrs = load <8 x i8*>, <8 x i8*>* %b
+ %cval = load <8 x i8>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = icmp eq <8 x i8> %cval, zeroinitializer
- %vals = call <8 x i8> @llvm.masked.gather.v8i8(<8 x i8*> %ptrs, i32 8, <8 x i1> %mask, <8 x i8> undef)
- store <8 x i8> %vals, <8 x i8>* %a
+ %vals = call <8 x i8> @llvm.masked.gather.v8i8(<8 x ptr> %ptrs, i32 8, <8 x i1> %mask, <8 x i8> undef)
+ store <8 x i8> %vals, ptr %a
ret void
}
-define void @masked_gather_v16i8(<16 x i8>* %a, <16 x i8*>* %b) vscale_range(8,0) #0 {
+define void @masked_gather_v16i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
- %cval = load <16 x i8>, <16 x i8>* %a
- %ptrs = load <16 x i8*>, <16 x i8*>* %b
+ %cval = load <16 x i8>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = icmp eq <16 x i8> %cval, zeroinitializer
- %vals = call <16 x i8> @llvm.masked.gather.v16i8(<16 x i8*> %ptrs, i32 8, <16 x i1> %mask, <16 x i8> undef)
- store <16 x i8> %vals, <16 x i8>* %a
+ %vals = call <16 x i8> @llvm.masked.gather.v16i8(<16 x ptr> %ptrs, i32 8, <16 x i1> %mask, <16 x i8> undef)
+ store <16 x i8> %vals, ptr %a
ret void
}
-define void @masked_gather_v32i8(<32 x i8>* %a, <32 x i8*>* %b) vscale_range(16,0) #0 {
+define void @masked_gather_v32i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z1.d]
; CHECK-NEXT: st1b { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cval = load <32 x i8>, <32 x i8>* %a
- %ptrs = load <32 x i8*>, <32 x i8*>* %b
+ %cval = load <32 x i8>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = icmp eq <32 x i8> %cval, zeroinitializer
- %vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x i8*> %ptrs, i32 8, <32 x i1> %mask, <32 x i8> undef)
- store <32 x i8> %vals, <32 x i8>* %a
+ %vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x i8> undef)
+ store <32 x i8> %vals, ptr %a
ret void
}
; LD1H
;
-define void @masked_gather_v2i16(<2 x i16>* %a, <2 x i16*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v2i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %cval = load <2 x i16>, <2 x i16>* %a
- %ptrs = load <2 x i16*>, <2 x i16*>* %b
+ %cval = load <2 x i16>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = icmp eq <2 x i16> %cval, zeroinitializer
- %vals = call <2 x i16> @llvm.masked.gather.v2i16(<2 x i16*> %ptrs, i32 8, <2 x i1> %mask, <2 x i16> undef)
- store <2 x i16> %vals, <2 x i16>* %a
+ %vals = call <2 x i16> @llvm.masked.gather.v2i16(<2 x ptr> %ptrs, i32 8, <2 x i1> %mask, <2 x i16> undef)
+ store <2 x i16> %vals, ptr %a
ret void
}
-define void @masked_gather_v4i16(<4 x i16>* %a, <4 x i16*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v4i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
- %cval = load <4 x i16>, <4 x i16>* %a
- %ptrs = load <4 x i16*>, <4 x i16*>* %b
+ %cval = load <4 x i16>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = icmp eq <4 x i16> %cval, zeroinitializer
- %vals = call <4 x i16> @llvm.masked.gather.v4i16(<4 x i16*> %ptrs, i32 8, <4 x i1> %mask, <4 x i16> undef)
- store <4 x i16> %vals, <4 x i16>* %a
+ %vals = call <4 x i16> @llvm.masked.gather.v4i16(<4 x ptr> %ptrs, i32 8, <4 x i1> %mask, <4 x i16> undef)
+ store <4 x i16> %vals, ptr %a
ret void
}
-define void @masked_gather_v8i16(<8 x i16>* %a, <8 x i16*>* %b) #0 {
+define void @masked_gather_v8i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_gather_v8i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x0]
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: str q0, [x0]
; VBITS_GE_512-NEXT: ret
- %cval = load <8 x i16>, <8 x i16>* %a
- %ptrs = load <8 x i16*>, <8 x i16*>* %b
+ %cval = load <8 x i16>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = icmp eq <8 x i16> %cval, zeroinitializer
- %vals = call <8 x i16> @llvm.masked.gather.v8i16(<8 x i16*> %ptrs, i32 8, <8 x i1> %mask, <8 x i16> undef)
- store <8 x i16> %vals, <8 x i16>* %a
+ %vals = call <8 x i16> @llvm.masked.gather.v8i16(<8 x ptr> %ptrs, i32 8, <8 x i1> %mask, <8 x i16> undef)
+ store <8 x i16> %vals, ptr %a
ret void
}
-define void @masked_gather_v16i16(<16 x i16>* %a, <16 x i16*>* %b) vscale_range(8,0) #0 {
+define void @masked_gather_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d]
; CHECK-NEXT: st1h { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cval = load <16 x i16>, <16 x i16>* %a
- %ptrs = load <16 x i16*>, <16 x i16*>* %b
+ %cval = load <16 x i16>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = icmp eq <16 x i16> %cval, zeroinitializer
- %vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x i16*> %ptrs, i32 8, <16 x i1> %mask, <16 x i16> undef)
- store <16 x i16> %vals, <16 x i16>* %a
+ %vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x ptr> %ptrs, i32 8, <16 x i1> %mask, <16 x i16> undef)
+ store <16 x i16> %vals, ptr %a
ret void
}
-define void @masked_gather_v32i16(<32 x i16>* %a, <32 x i16*>* %b) vscale_range(16,0) #0 {
+define void @masked_gather_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d]
; CHECK-NEXT: st1h { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cval = load <32 x i16>, <32 x i16>* %a
- %ptrs = load <32 x i16*>, <32 x i16*>* %b
+ %cval = load <32 x i16>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = icmp eq <32 x i16> %cval, zeroinitializer
- %vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x i16*> %ptrs, i32 8, <32 x i1> %mask, <32 x i16> undef)
- store <32 x i16> %vals, <32 x i16>* %a
+ %vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x i16> undef)
+ store <32 x i16> %vals, ptr %a
ret void
}
; LD1W
;
-define void @masked_gather_v2i32(<2 x i32>* %a, <2 x i32*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v2i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
- %cval = load <2 x i32>, <2 x i32>* %a
- %ptrs = load <2 x i32*>, <2 x i32*>* %b
+ %cval = load <2 x i32>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = icmp eq <2 x i32> %cval, zeroinitializer
- %vals = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ptrs, i32 8, <2 x i1> %mask, <2 x i32> undef)
- store <2 x i32> %vals, <2 x i32>* %a
+ %vals = call <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr> %ptrs, i32 8, <2 x i1> %mask, <2 x i32> undef)
+ store <2 x i32> %vals, ptr %a
ret void
}
-define void @masked_gather_v4i32(<4 x i32>* %a, <4 x i32*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v4i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
- %cval = load <4 x i32>, <4 x i32>* %a
- %ptrs = load <4 x i32*>, <4 x i32*>* %b
+ %cval = load <4 x i32>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = icmp eq <4 x i32> %cval, zeroinitializer
- %vals = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 8, <4 x i1> %mask, <4 x i32> undef)
- store <4 x i32> %vals, <4 x i32>* %a
+ %vals = call <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr> %ptrs, i32 8, <4 x i1> %mask, <4 x i32> undef)
+ store <4 x i32> %vals, ptr %a
ret void
}
-define void @masked_gather_v8i32(<8 x i32>* %a, <8 x i32*>* %b) #0 {
+define void @masked_gather_v8i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_gather_v8i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [z1.d]
; VBITS_GE_512-NEXT: st1w { z0.d }, p1, [x0]
; VBITS_GE_512-NEXT: ret
- %cval = load <8 x i32>, <8 x i32>* %a
- %ptrs = load <8 x i32*>, <8 x i32*>* %b
+ %cval = load <8 x i32>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = icmp eq <8 x i32> %cval, zeroinitializer
- %vals = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 8, <8 x i1> %mask, <8 x i32> undef)
- store <8 x i32> %vals, <8 x i32>* %a
+ %vals = call <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr> %ptrs, i32 8, <8 x i1> %mask, <8 x i32> undef)
+ store <8 x i32> %vals, ptr %a
ret void
}
-define void @masked_gather_v16i32(<16 x i32>* %a, <16 x i32*>* %b) vscale_range(8,0) #0 {
+define void @masked_gather_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d]
; CHECK-NEXT: st1w { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cval = load <16 x i32>, <16 x i32>* %a
- %ptrs = load <16 x i32*>, <16 x i32*>* %b
+ %cval = load <16 x i32>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = icmp eq <16 x i32> %cval, zeroinitializer
- %vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 8, <16 x i1> %mask, <16 x i32> undef)
- store <16 x i32> %vals, <16 x i32>* %a
+ %vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x ptr> %ptrs, i32 8, <16 x i1> %mask, <16 x i32> undef)
+ store <16 x i32> %vals, ptr %a
ret void
}
-define void @masked_gather_v32i32(<32 x i32>* %a, <32 x i32*>* %b) vscale_range(16,0) #0 {
+define void @masked_gather_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d]
; CHECK-NEXT: st1w { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cval = load <32 x i32>, <32 x i32>* %a
- %ptrs = load <32 x i32*>, <32 x i32*>* %b
+ %cval = load <32 x i32>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = icmp eq <32 x i32> %cval, zeroinitializer
- %vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x i32*> %ptrs, i32 8, <32 x i1> %mask, <32 x i32> undef)
- store <32 x i32> %vals, <32 x i32>* %a
+ %vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x i32> undef)
+ store <32 x i32> %vals, ptr %a
ret void
}
;
; Scalarize 1 x i64 gathers
-define void @masked_gather_v1i64(<1 x i64>* %a, <1 x i64*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v1i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: .LBB15_2: // %else
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
- %cval = load <1 x i64>, <1 x i64>* %a
- %ptrs = load <1 x i64*>, <1 x i64*>* %b
+ %cval = load <1 x i64>, ptr %a
+ %ptrs = load <1 x ptr>, ptr %b
%mask = icmp eq <1 x i64> %cval, zeroinitializer
- %vals = call <1 x i64> @llvm.masked.gather.v1i64(<1 x i64*> %ptrs, i32 8, <1 x i1> %mask, <1 x i64> undef)
- store <1 x i64> %vals, <1 x i64>* %a
+ %vals = call <1 x i64> @llvm.masked.gather.v1i64(<1 x ptr> %ptrs, i32 8, <1 x i1> %mask, <1 x i64> undef)
+ store <1 x i64> %vals, ptr %a
ret void
}
-define void @masked_gather_v2i64(<2 x i64>* %a, <2 x i64*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z1.d]
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
- %cval = load <2 x i64>, <2 x i64>* %a
- %ptrs = load <2 x i64*>, <2 x i64*>* %b
+ %cval = load <2 x i64>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = icmp eq <2 x i64> %cval, zeroinitializer
- %vals = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ptrs, i32 8, <2 x i1> %mask, <2 x i64> undef)
- store <2 x i64> %vals, <2 x i64>* %a
+ %vals = call <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ptrs, i32 8, <2 x i1> %mask, <2 x i64> undef)
+ store <2 x i64> %vals, ptr %a
ret void
}
-define void @masked_gather_v4i64(<4 x i64>* %a, <4 x i64*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p1/z, [z1.d]
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %cval = load <4 x i64>, <4 x i64>* %a
- %ptrs = load <4 x i64*>, <4 x i64*>* %b
+ %cval = load <4 x i64>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = icmp eq <4 x i64> %cval, zeroinitializer
- %vals = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ptrs, i32 8, <4 x i1> %mask, <4 x i64> undef)
- store <4 x i64> %vals, <4 x i64>* %a
+ %vals = call <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr> %ptrs, i32 8, <4 x i1> %mask, <4 x i64> undef)
+ store <4 x i64> %vals, ptr %a
ret void
}
-define void @masked_gather_v8i64(<8 x i64>* %a, <8 x i64*>* %b) #0 {
+define void @masked_gather_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_gather_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [z1.d]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %cval = load <8 x i64>, <8 x i64>* %a
- %ptrs = load <8 x i64*>, <8 x i64*>* %b
+ %cval = load <8 x i64>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = icmp eq <8 x i64> %cval, zeroinitializer
- %vals = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %ptrs, i32 8, <8 x i1> %mask, <8 x i64> undef)
- store <8 x i64> %vals, <8 x i64>* %a
+ %vals = call <8 x i64> @llvm.masked.gather.v8i64(<8 x ptr> %ptrs, i32 8, <8 x i1> %mask, <8 x i64> undef)
+ store <8 x i64> %vals, ptr %a
ret void
}
-define void @masked_gather_v16i64(<16 x i64>* %a, <16 x i64*>* %b) vscale_range(8,0) #0 {
+define void @masked_gather_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p1/z, [z1.d]
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %cval = load <16 x i64>, <16 x i64>* %a
- %ptrs = load <16 x i64*>, <16 x i64*>* %b
+ %cval = load <16 x i64>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = icmp eq <16 x i64> %cval, zeroinitializer
- %vals = call <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32 8, <16 x i1> %mask, <16 x i64> undef)
- store <16 x i64> %vals, <16 x i64>* %a
+ %vals = call <16 x i64> @llvm.masked.gather.v16i64(<16 x ptr> %ptrs, i32 8, <16 x i1> %mask, <16 x i64> undef)
+ store <16 x i64> %vals, ptr %a
ret void
}
-define void @masked_gather_v32i64(<32 x i64>* %a, <32 x i64*>* %b) vscale_range(16,0) #0 {
+define void @masked_gather_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p1/z, [z1.d]
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %cval = load <32 x i64>, <32 x i64>* %a
- %ptrs = load <32 x i64*>, <32 x i64*>* %b
+ %cval = load <32 x i64>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = icmp eq <32 x i64> %cval, zeroinitializer
- %vals = call <32 x i64> @llvm.masked.gather.v32i64(<32 x i64*> %ptrs, i32 8, <32 x i1> %mask, <32 x i64> undef)
- store <32 x i64> %vals, <32 x i64>* %a
+ %vals = call <32 x i64> @llvm.masked.gather.v32i64(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x i64> undef)
+ store <32 x i64> %vals, ptr %a
ret void
}
; LD1H (float)
;
-define void @masked_gather_v2f16(<2 x half>* %a, <2 x half*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v2f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s1, [x0]
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
- %cval = load <2 x half>, <2 x half>* %a
- %ptrs = load <2 x half*>, <2 x half*>* %b
+ %cval = load <2 x half>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = fcmp oeq <2 x half> %cval, zeroinitializer
- %vals = call <2 x half> @llvm.masked.gather.v2f16(<2 x half*> %ptrs, i32 8, <2 x i1> %mask, <2 x half> undef)
- store <2 x half> %vals, <2 x half>* %a
+ %vals = call <2 x half> @llvm.masked.gather.v2f16(<2 x ptr> %ptrs, i32 8, <2 x i1> %mask, <2 x half> undef)
+ store <2 x half> %vals, ptr %a
ret void
}
-define void @masked_gather_v4f16(<4 x half>* %a, <4 x half*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v4f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
- %cval = load <4 x half>, <4 x half>* %a
- %ptrs = load <4 x half*>, <4 x half*>* %b
+ %cval = load <4 x half>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = fcmp oeq <4 x half> %cval, zeroinitializer
- %vals = call <4 x half> @llvm.masked.gather.v4f16(<4 x half*> %ptrs, i32 8, <4 x i1> %mask, <4 x half> undef)
- store <4 x half> %vals, <4 x half>* %a
+ %vals = call <4 x half> @llvm.masked.gather.v4f16(<4 x ptr> %ptrs, i32 8, <4 x i1> %mask, <4 x half> undef)
+ store <4 x half> %vals, ptr %a
ret void
}
-define void @masked_gather_v8f16(<8 x half>* %a, <8 x half*>* %b) #0 {
+define void @masked_gather_v8f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_gather_v8f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x0]
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: str q0, [x0]
; VBITS_GE_512-NEXT: ret
- %cval = load <8 x half>, <8 x half>* %a
- %ptrs = load <8 x half*>, <8 x half*>* %b
+ %cval = load <8 x half>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = fcmp oeq <8 x half> %cval, zeroinitializer
- %vals = call <8 x half> @llvm.masked.gather.v8f16(<8 x half*> %ptrs, i32 8, <8 x i1> %mask, <8 x half> undef)
- store <8 x half> %vals, <8 x half>* %a
+ %vals = call <8 x half> @llvm.masked.gather.v8f16(<8 x ptr> %ptrs, i32 8, <8 x i1> %mask, <8 x half> undef)
+ store <8 x half> %vals, ptr %a
ret void
}
-define void @masked_gather_v16f16(<16 x half>* %a, <16 x half*>* %b) vscale_range(8,0) #0 {
+define void @masked_gather_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d]
; CHECK-NEXT: st1h { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cval = load <16 x half>, <16 x half>* %a
- %ptrs = load <16 x half*>, <16 x half*>* %b
+ %cval = load <16 x half>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = fcmp oeq <16 x half> %cval, zeroinitializer
- %vals = call <16 x half> @llvm.masked.gather.v16f16(<16 x half*> %ptrs, i32 8, <16 x i1> %mask, <16 x half> undef)
- store <16 x half> %vals, <16 x half>* %a
+ %vals = call <16 x half> @llvm.masked.gather.v16f16(<16 x ptr> %ptrs, i32 8, <16 x i1> %mask, <16 x half> undef)
+ store <16 x half> %vals, ptr %a
ret void
}
-define void @masked_gather_v32f16(<32 x half>* %a, <32 x half*>* %b) vscale_range(16,0) #0 {
+define void @masked_gather_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d]
; CHECK-NEXT: st1h { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cval = load <32 x half>, <32 x half>* %a
- %ptrs = load <32 x half*>, <32 x half*>* %b
+ %cval = load <32 x half>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = fcmp oeq <32 x half> %cval, zeroinitializer
- %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x half*> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef)
- store <32 x half> %vals, <32 x half>* %a
+ %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef)
+ store <32 x half> %vals, ptr %a
ret void
}
; LD1W (float)
;
-define void @masked_gather_v2f32(<2 x float>* %a, <2 x float*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v2f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
- %cval = load <2 x float>, <2 x float>* %a
- %ptrs = load <2 x float*>, <2 x float*>* %b
+ %cval = load <2 x float>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = fcmp oeq <2 x float> %cval, zeroinitializer
- %vals = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %ptrs, i32 8, <2 x i1> %mask, <2 x float> undef)
- store <2 x float> %vals, <2 x float>* %a
+ %vals = call <2 x float> @llvm.masked.gather.v2f32(<2 x ptr> %ptrs, i32 8, <2 x i1> %mask, <2 x float> undef)
+ store <2 x float> %vals, ptr %a
ret void
}
-define void @masked_gather_v4f32(<4 x float>* %a, <4 x float*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v4f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
- %cval = load <4 x float>, <4 x float>* %a
- %ptrs = load <4 x float*>, <4 x float*>* %b
+ %cval = load <4 x float>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = fcmp oeq <4 x float> %cval, zeroinitializer
- %vals = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %ptrs, i32 8, <4 x i1> %mask, <4 x float> undef)
- store <4 x float> %vals, <4 x float>* %a
+ %vals = call <4 x float> @llvm.masked.gather.v4f32(<4 x ptr> %ptrs, i32 8, <4 x i1> %mask, <4 x float> undef)
+ store <4 x float> %vals, ptr %a
ret void
}
-define void @masked_gather_v8f32(<8 x float>* %a, <8 x float*>* %b) #0 {
+define void @masked_gather_v8f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_gather_v8f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [z1.d]
; VBITS_GE_512-NEXT: st1w { z0.d }, p1, [x0]
; VBITS_GE_512-NEXT: ret
- %cval = load <8 x float>, <8 x float>* %a
- %ptrs = load <8 x float*>, <8 x float*>* %b
+ %cval = load <8 x float>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = fcmp oeq <8 x float> %cval, zeroinitializer
- %vals = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %ptrs, i32 8, <8 x i1> %mask, <8 x float> undef)
- store <8 x float> %vals, <8 x float>* %a
+ %vals = call <8 x float> @llvm.masked.gather.v8f32(<8 x ptr> %ptrs, i32 8, <8 x i1> %mask, <8 x float> undef)
+ store <8 x float> %vals, ptr %a
ret void
}
-define void @masked_gather_v16f32(<16 x float>* %a, <16 x float*>* %b) vscale_range(8,0) #0 {
+define void @masked_gather_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d]
; CHECK-NEXT: st1w { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cval = load <16 x float>, <16 x float>* %a
- %ptrs = load <16 x float*>, <16 x float*>* %b
+ %cval = load <16 x float>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = fcmp oeq <16 x float> %cval, zeroinitializer
- %vals = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %ptrs, i32 8, <16 x i1> %mask, <16 x float> undef)
- store <16 x float> %vals, <16 x float>* %a
+ %vals = call <16 x float> @llvm.masked.gather.v16f32(<16 x ptr> %ptrs, i32 8, <16 x i1> %mask, <16 x float> undef)
+ store <16 x float> %vals, ptr %a
ret void
}
-define void @masked_gather_v32f32(<32 x float>* %a, <32 x float*>* %b) vscale_range(16,0) #0 {
+define void @masked_gather_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d]
; CHECK-NEXT: st1w { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cval = load <32 x float>, <32 x float>* %a
- %ptrs = load <32 x float*>, <32 x float*>* %b
+ %cval = load <32 x float>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = fcmp oeq <32 x float> %cval, zeroinitializer
- %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef)
- store <32 x float> %vals, <32 x float>* %a
+ %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef)
+ store <32 x float> %vals, ptr %a
ret void
}
;
; Scalarize 1 x double gathers
-define void @masked_gather_v1f64(<1 x double>* %a, <1 x double*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v1f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: .LBB31_2: // %else
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
- %cval = load <1 x double>, <1 x double>* %a
- %ptrs = load <1 x double*>, <1 x double*>* %b
+ %cval = load <1 x double>, ptr %a
+ %ptrs = load <1 x ptr>, ptr %b
%mask = fcmp oeq <1 x double> %cval, zeroinitializer
- %vals = call <1 x double> @llvm.masked.gather.v1f64(<1 x double*> %ptrs, i32 8, <1 x i1> %mask, <1 x double> undef)
- store <1 x double> %vals, <1 x double>* %a
+ %vals = call <1 x double> @llvm.masked.gather.v1f64(<1 x ptr> %ptrs, i32 8, <1 x i1> %mask, <1 x double> undef)
+ store <1 x double> %vals, ptr %a
ret void
}
-define void @masked_gather_v2f64(<2 x double>* %a, <2 x double*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v2f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z1.d]
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
- %cval = load <2 x double>, <2 x double>* %a
- %ptrs = load <2 x double*>, <2 x double*>* %b
+ %cval = load <2 x double>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = fcmp oeq <2 x double> %cval, zeroinitializer
- %vals = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 8, <2 x i1> %mask, <2 x double> undef)
- store <2 x double> %vals, <2 x double>* %a
+ %vals = call <2 x double> @llvm.masked.gather.v2f64(<2 x ptr> %ptrs, i32 8, <2 x i1> %mask, <2 x double> undef)
+ store <2 x double> %vals, ptr %a
ret void
}
-define void @masked_gather_v4f64(<4 x double>* %a, <4 x double*>* %b) vscale_range(2,0) #0 {
+define void @masked_gather_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_gather_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p1/z, [z1.d]
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %cval = load <4 x double>, <4 x double>* %a
- %ptrs = load <4 x double*>, <4 x double*>* %b
+ %cval = load <4 x double>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = fcmp oeq <4 x double> %cval, zeroinitializer
- %vals = call <4 x double> @llvm.masked.gather.v4f64(<4 x double*> %ptrs, i32 8, <4 x i1> %mask, <4 x double> undef)
- store <4 x double> %vals, <4 x double>* %a
+ %vals = call <4 x double> @llvm.masked.gather.v4f64(<4 x ptr> %ptrs, i32 8, <4 x i1> %mask, <4 x double> undef)
+ store <4 x double> %vals, ptr %a
ret void
}
-define void @masked_gather_v8f64(<8 x double>* %a, <8 x double*>* %b) #0 {
+define void @masked_gather_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_gather_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [z1.d]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %cval = load <8 x double>, <8 x double>* %a
- %ptrs = load <8 x double*>, <8 x double*>* %b
+ %cval = load <8 x double>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = fcmp oeq <8 x double> %cval, zeroinitializer
- %vals = call <8 x double> @llvm.masked.gather.v8f64(<8 x double*> %ptrs, i32 8, <8 x i1> %mask, <8 x double> undef)
- store <8 x double> %vals, <8 x double>* %a
+ %vals = call <8 x double> @llvm.masked.gather.v8f64(<8 x ptr> %ptrs, i32 8, <8 x i1> %mask, <8 x double> undef)
+ store <8 x double> %vals, ptr %a
ret void
}
-define void @masked_gather_v16f64(<16 x double>* %a, <16 x double*>* %b) vscale_range(8,0) #0 {
+define void @masked_gather_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p1/z, [z1.d]
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %cval = load <16 x double>, <16 x double>* %a
- %ptrs = load <16 x double*>, <16 x double*>* %b
+ %cval = load <16 x double>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = fcmp oeq <16 x double> %cval, zeroinitializer
- %vals = call <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32 8, <16 x i1> %mask, <16 x double> undef)
- store <16 x double> %vals, <16 x double>* %a
+ %vals = call <16 x double> @llvm.masked.gather.v16f64(<16 x ptr> %ptrs, i32 8, <16 x i1> %mask, <16 x double> undef)
+ store <16 x double> %vals, ptr %a
ret void
}
-define void @masked_gather_v32f64(<32 x double>* %a, <32 x double*>* %b) vscale_range(16,0) #0 {
+define void @masked_gather_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p1/z, [z1.d]
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %cval = load <32 x double>, <32 x double>* %a
- %ptrs = load <32 x double*>, <32 x double*>* %b
+ %cval = load <32 x double>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = fcmp oeq <32 x double> %cval, zeroinitializer
- %vals = call <32 x double> @llvm.masked.gather.v32f64(<32 x double*> %ptrs, i32 8, <32 x i1> %mask, <32 x double> undef)
- store <32 x double> %vals, <32 x double>* %a
+ %vals = call <32 x double> @llvm.masked.gather.v32f64(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x double> undef)
+ store <32 x double> %vals, ptr %a
ret void
}
; The above tests test the types, the below tests check that the addressing
; modes still function
-define void @masked_gather_32b_scaled_sext_f16(<32 x half>* %a, <32 x i32>* %b, half* %base) vscale_range(8,0) #0 {
+define void @masked_gather_32b_scaled_sext_f16(ptr %a, ptr %b, ptr %base) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_32b_scaled_sext_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x2, z1.s, sxtw #1]
; CHECK-NEXT: st1h { z0.s }, p1, [x0]
; CHECK-NEXT: ret
- %cvals = load <32 x half>, <32 x half>* %a
- %idxs = load <32 x i32>, <32 x i32>* %b
+ %cvals = load <32 x half>, ptr %a
+ %idxs = load <32 x i32>, ptr %b
%ext = sext <32 x i32> %idxs to <32 x i64>
- %ptrs = getelementptr half, half* %base, <32 x i64> %ext
+ %ptrs = getelementptr half, ptr %base, <32 x i64> %ext
%mask = fcmp oeq <32 x half> %cvals, zeroinitializer
- %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x half*> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef)
- store <32 x half> %vals, <32 x half>* %a
+ %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef)
+ store <32 x half> %vals, ptr %a
ret void
}
-define void @masked_gather_32b_scaled_sext_f32(<32 x float>* %a, <32 x i32>* %b, float* %base) vscale_range(8,0) #0 {
+define void @masked_gather_32b_scaled_sext_f32(ptr %a, ptr %b, ptr %base) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_32b_scaled_sext_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x2, z1.s, sxtw #2]
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %cvals = load <32 x float>, <32 x float>* %a
- %idxs = load <32 x i32>, <32 x i32>* %b
+ %cvals = load <32 x float>, ptr %a
+ %idxs = load <32 x i32>, ptr %b
%ext = sext <32 x i32> %idxs to <32 x i64>
- %ptrs = getelementptr float, float* %base, <32 x i64> %ext
+ %ptrs = getelementptr float, ptr %base, <32 x i64> %ext
%mask = fcmp oeq <32 x float> %cvals, zeroinitializer
- %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef)
- store <32 x float> %vals, <32 x float>* %a
+ %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef)
+ store <32 x float> %vals, ptr %a
ret void
}
-define void @masked_gather_32b_scaled_sext_f64(<32 x double>* %a, <32 x i32>* %b, double* %base) vscale_range(16,0) #0 {
+define void @masked_gather_32b_scaled_sext_f64(ptr %a, ptr %b, ptr %base) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_32b_scaled_sext_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x2, z1.d, lsl #3]
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %cvals = load <32 x double>, <32 x double>* %a
- %idxs = load <32 x i32>, <32 x i32>* %b
+ %cvals = load <32 x double>, ptr %a
+ %idxs = load <32 x i32>, ptr %b
%ext = sext <32 x i32> %idxs to <32 x i64>
- %ptrs = getelementptr double, double* %base, <32 x i64> %ext
+ %ptrs = getelementptr double, ptr %base, <32 x i64> %ext
%mask = fcmp oeq <32 x double> %cvals, zeroinitializer
- %vals = call <32 x double> @llvm.masked.gather.v32f64(<32 x double*> %ptrs, i32 8, <32 x i1> %mask, <32 x double> undef)
- store <32 x double> %vals, <32 x double>* %a
+ %vals = call <32 x double> @llvm.masked.gather.v32f64(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x double> undef)
+ store <32 x double> %vals, ptr %a
ret void
}
-define void @masked_gather_32b_scaled_zext(<32 x half>* %a, <32 x i32>* %b, half* %base) vscale_range(8,0) #0 {
+define void @masked_gather_32b_scaled_zext(ptr %a, ptr %b, ptr %base) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_32b_scaled_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x2, z1.s, uxtw #1]
; CHECK-NEXT: st1h { z0.s }, p1, [x0]
; CHECK-NEXT: ret
- %cvals = load <32 x half>, <32 x half>* %a
- %idxs = load <32 x i32>, <32 x i32>* %b
+ %cvals = load <32 x half>, ptr %a
+ %idxs = load <32 x i32>, ptr %b
%ext = zext <32 x i32> %idxs to <32 x i64>
- %ptrs = getelementptr half, half* %base, <32 x i64> %ext
+ %ptrs = getelementptr half, ptr %base, <32 x i64> %ext
%mask = fcmp oeq <32 x half> %cvals, zeroinitializer
- %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x half*> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef)
- store <32 x half> %vals, <32 x half>* %a
+ %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef)
+ store <32 x half> %vals, ptr %a
ret void
}
-define void @masked_gather_32b_unscaled_sext(<32 x half>* %a, <32 x i32>* %b, i8* %base) vscale_range(8,0) #0 {
+define void @masked_gather_32b_unscaled_sext(ptr %a, ptr %b, ptr %base) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_32b_unscaled_sext:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x2, z1.s, sxtw]
; CHECK-NEXT: st1h { z0.s }, p1, [x0]
; CHECK-NEXT: ret
- %cvals = load <32 x half>, <32 x half>* %a
- %idxs = load <32 x i32>, <32 x i32>* %b
+ %cvals = load <32 x half>, ptr %a
+ %idxs = load <32 x i32>, ptr %b
%ext = sext <32 x i32> %idxs to <32 x i64>
- %byte_ptrs = getelementptr i8, i8* %base, <32 x i64> %ext
- %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x half*>
+ %byte_ptrs = getelementptr i8, ptr %base, <32 x i64> %ext
+ %ptrs = bitcast <32 x ptr> %byte_ptrs to <32 x ptr>
%mask = fcmp oeq <32 x half> %cvals, zeroinitializer
- %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x half*> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef)
- store <32 x half> %vals, <32 x half>* %a
+ %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef)
+ store <32 x half> %vals, ptr %a
ret void
}
-define void @masked_gather_32b_unscaled_zext(<32 x half>* %a, <32 x i32>* %b, i8* %base) vscale_range(8,0) #0 {
+define void @masked_gather_32b_unscaled_zext(ptr %a, ptr %b, ptr %base) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_32b_unscaled_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x2, z1.s, uxtw]
; CHECK-NEXT: st1h { z0.s }, p1, [x0]
; CHECK-NEXT: ret
- %cvals = load <32 x half>, <32 x half>* %a
- %idxs = load <32 x i32>, <32 x i32>* %b
+ %cvals = load <32 x half>, ptr %a
+ %idxs = load <32 x i32>, ptr %b
%ext = zext <32 x i32> %idxs to <32 x i64>
- %byte_ptrs = getelementptr i8, i8* %base, <32 x i64> %ext
- %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x half*>
+ %byte_ptrs = getelementptr i8, ptr %base, <32 x i64> %ext
+ %ptrs = bitcast <32 x ptr> %byte_ptrs to <32 x ptr>
%mask = fcmp oeq <32 x half> %cvals, zeroinitializer
- %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x half*> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef)
- store <32 x half> %vals, <32 x half>* %a
+ %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef)
+ store <32 x half> %vals, ptr %a
ret void
}
-define void @masked_gather_64b_scaled(<32 x float>* %a, <32 x i64>* %b, float* %base) vscale_range(16,0) #0 {
+define void @masked_gather_64b_scaled(ptr %a, ptr %b, ptr %base) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_64b_scaled:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x2, z1.d, lsl #2]
; CHECK-NEXT: st1w { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cvals = load <32 x float>, <32 x float>* %a
- %idxs = load <32 x i64>, <32 x i64>* %b
- %ptrs = getelementptr float, float* %base, <32 x i64> %idxs
+ %cvals = load <32 x float>, ptr %a
+ %idxs = load <32 x i64>, ptr %b
+ %ptrs = getelementptr float, ptr %base, <32 x i64> %idxs
%mask = fcmp oeq <32 x float> %cvals, zeroinitializer
- %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef)
- store <32 x float> %vals, <32 x float>* %a
+ %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef)
+ store <32 x float> %vals, ptr %a
ret void
}
-define void @masked_gather_64b_unscaled(<32 x float>* %a, <32 x i64>* %b, i8* %base) vscale_range(16,0) #0 {
+define void @masked_gather_64b_unscaled(ptr %a, ptr %b, ptr %base) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_64b_unscaled:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x2, z1.d]
; CHECK-NEXT: st1w { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cvals = load <32 x float>, <32 x float>* %a
- %idxs = load <32 x i64>, <32 x i64>* %b
- %byte_ptrs = getelementptr i8, i8* %base, <32 x i64> %idxs
- %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x float*>
+ %cvals = load <32 x float>, ptr %a
+ %idxs = load <32 x i64>, ptr %b
+ %byte_ptrs = getelementptr i8, ptr %base, <32 x i64> %idxs
+ %ptrs = bitcast <32 x ptr> %byte_ptrs to <32 x ptr>
%mask = fcmp oeq <32 x float> %cvals, zeroinitializer
- %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef)
- store <32 x float> %vals, <32 x float>* %a
+ %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef)
+ store <32 x float> %vals, ptr %a
ret void
}
-define void @masked_gather_vec_plus_reg(<32 x float>* %a, <32 x i8*>* %b, i64 %off) vscale_range(16,0) #0 {
+define void @masked_gather_vec_plus_reg(ptr %a, ptr %b, i64 %off) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_vec_plus_reg:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x2, z1.d]
; CHECK-NEXT: st1w { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cvals = load <32 x float>, <32 x float>* %a
- %bases = load <32 x i8*>, <32 x i8*>* %b
- %byte_ptrs = getelementptr i8, <32 x i8*> %bases, i64 %off
- %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x float*>
+ %cvals = load <32 x float>, ptr %a
+ %bases = load <32 x ptr>, ptr %b
+ %byte_ptrs = getelementptr i8, <32 x ptr> %bases, i64 %off
+ %ptrs = bitcast <32 x ptr> %byte_ptrs to <32 x ptr>
%mask = fcmp oeq <32 x float> %cvals, zeroinitializer
- %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef)
- store <32 x float> %vals, <32 x float>* %a
+ %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef)
+ store <32 x float> %vals, ptr %a
ret void
}
-define void @masked_gather_vec_plus_imm(<32 x float>* %a, <32 x i8*>* %b) vscale_range(16,0) #0 {
+define void @masked_gather_vec_plus_imm(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_vec_plus_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d, #4]
; CHECK-NEXT: st1w { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cvals = load <32 x float>, <32 x float>* %a
- %bases = load <32 x i8*>, <32 x i8*>* %b
- %byte_ptrs = getelementptr i8, <32 x i8*> %bases, i64 4
- %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x float*>
+ %cvals = load <32 x float>, ptr %a
+ %bases = load <32 x ptr>, ptr %b
+ %byte_ptrs = getelementptr i8, <32 x ptr> %bases, i64 4
+ %ptrs = bitcast <32 x ptr> %byte_ptrs to <32 x ptr>
%mask = fcmp oeq <32 x float> %cvals, zeroinitializer
- %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef)
- store <32 x float> %vals, <32 x float>* %a
+ %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef)
+ store <32 x float> %vals, ptr %a
ret void
}
-define void @masked_gather_passthru(<32 x float>* %a, <32 x float*>* %b, <32 x float>* %c) vscale_range(16,0) #0 {
+define void @masked_gather_passthru(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_passthru:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %cvals = load <32 x float>, <32 x float>* %a
- %ptrs = load <32 x float*>, <32 x float*>* %b
- %passthru = load <32 x float>, <32 x float>* %c
+ %cvals = load <32 x float>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
+ %passthru = load <32 x float>, ptr %c
%mask = fcmp oeq <32 x float> %cvals, zeroinitializer
- %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> %passthru)
- store <32 x float> %vals, <32 x float>* %a
+ %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x float> %passthru)
+ store <32 x float> %vals, ptr %a
ret void
}
-define void @masked_gather_passthru_0(<32 x float>* %a, <32 x float*>* %b) vscale_range(16,0) #0 {
+define void @masked_gather_passthru_0(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_passthru_0:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d]
; CHECK-NEXT: st1w { z0.d }, p1, [x0]
; CHECK-NEXT: ret
- %cvals = load <32 x float>, <32 x float>* %a
- %ptrs = load <32 x float*>, <32 x float*>* %b
+ %cvals = load <32 x float>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = fcmp oeq <32 x float> %cvals, zeroinitializer
- %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> zeroinitializer)
- store <32 x float> %vals, <32 x float>* %a
+ %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x ptr> %ptrs, i32 8, <32 x i1> %mask, <32 x float> zeroinitializer)
+ store <32 x float> %vals, ptr %a
ret void
}
-declare <2 x i8> @llvm.masked.gather.v2i8(<2 x i8*>, i32, <2 x i1>, <2 x i8>)
-declare <4 x i8> @llvm.masked.gather.v4i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>)
-declare <8 x i8> @llvm.masked.gather.v8i8(<8 x i8*>, i32, <8 x i1>, <8 x i8>)
-declare <16 x i8> @llvm.masked.gather.v16i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>)
-declare <32 x i8> @llvm.masked.gather.v32i8(<32 x i8*>, i32, <32 x i1>, <32 x i8>)
-
-declare <2 x i16> @llvm.masked.gather.v2i16(<2 x i16*>, i32, <2 x i1>, <2 x i16>)
-declare <4 x i16> @llvm.masked.gather.v4i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>)
-declare <8 x i16> @llvm.masked.gather.v8i16(<8 x i16*>, i32, <8 x i1>, <8 x i16>)
-declare <16 x i16> @llvm.masked.gather.v16i16(<16 x i16*>, i32, <16 x i1>, <16 x i16>)
-declare <32 x i16> @llvm.masked.gather.v32i16(<32 x i16*>, i32, <32 x i1>, <32 x i16>)
-
-declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
-declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
-declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>)
-declare <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
-declare <32 x i32> @llvm.masked.gather.v32i32(<32 x i32*>, i32, <32 x i1>, <32 x i32>)
-
-declare <1 x i64> @llvm.masked.gather.v1i64(<1 x i64*>, i32, <1 x i1>, <1 x i64>)
-declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
-declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
-declare <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>)
-declare <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*>, i32, <16 x i1>, <16 x i64>)
-declare <32 x i64> @llvm.masked.gather.v32i64(<32 x i64*>, i32, <32 x i1>, <32 x i64>)
-
-declare <2 x half> @llvm.masked.gather.v2f16(<2 x half*>, i32, <2 x i1>, <2 x half>)
-declare <4 x half> @llvm.masked.gather.v4f16(<4 x half*>, i32, <4 x i1>, <4 x half>)
-declare <8 x half> @llvm.masked.gather.v8f16(<8 x half*>, i32, <8 x i1>, <8 x half>)
-declare <16 x half> @llvm.masked.gather.v16f16(<16 x half*>, i32, <16 x i1>, <16 x half>)
-declare <32 x half> @llvm.masked.gather.v32f16(<32 x half*>, i32, <32 x i1>, <32 x half>)
-
-declare <2 x float> @llvm.masked.gather.v2f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
-declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
-declare <8 x float> @llvm.masked.gather.v8f32(<8 x float*>, i32, <8 x i1>, <8 x float>)
-declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
-declare <32 x float> @llvm.masked.gather.v32f32(<32 x float*>, i32, <32 x i1>, <32 x float>)
-
-declare <1 x double> @llvm.masked.gather.v1f64(<1 x double*>, i32, <1 x i1>, <1 x double>)
-declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
-declare <4 x double> @llvm.masked.gather.v4f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
-declare <8 x double> @llvm.masked.gather.v8f64(<8 x double*>, i32, <8 x i1>, <8 x double>)
-declare <16 x double> @llvm.masked.gather.v16f64(<16 x double*>, i32, <16 x i1>, <16 x double>)
-declare <32 x double> @llvm.masked.gather.v32f64(<32 x double*>, i32, <32 x i1>, <32 x double>)
+declare <2 x i8> @llvm.masked.gather.v2i8(<2 x ptr>, i32, <2 x i1>, <2 x i8>)
+declare <4 x i8> @llvm.masked.gather.v4i8(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
+declare <8 x i8> @llvm.masked.gather.v8i8(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
+declare <16 x i8> @llvm.masked.gather.v16i8(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
+declare <32 x i8> @llvm.masked.gather.v32i8(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
+
+declare <2 x i16> @llvm.masked.gather.v2i16(<2 x ptr>, i32, <2 x i1>, <2 x i16>)
+declare <4 x i16> @llvm.masked.gather.v4i16(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
+declare <8 x i16> @llvm.masked.gather.v8i16(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
+declare <16 x i16> @llvm.masked.gather.v16i16(<16 x ptr>, i32, <16 x i1>, <16 x i16>)
+declare <32 x i16> @llvm.masked.gather.v32i16(<32 x ptr>, i32, <32 x i1>, <32 x i16>)
+
+declare <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
+declare <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
+declare <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
+declare <16 x i32> @llvm.masked.gather.v16i32(<16 x ptr>, i32, <16 x i1>, <16 x i32>)
+declare <32 x i32> @llvm.masked.gather.v32i32(<32 x ptr>, i32, <32 x i1>, <32 x i32>)
+
+declare <1 x i64> @llvm.masked.gather.v1i64(<1 x ptr>, i32, <1 x i1>, <1 x i64>)
+declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
+declare <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr>, i32, <4 x i1>, <4 x i64>)
+declare <8 x i64> @llvm.masked.gather.v8i64(<8 x ptr>, i32, <8 x i1>, <8 x i64>)
+declare <16 x i64> @llvm.masked.gather.v16i64(<16 x ptr>, i32, <16 x i1>, <16 x i64>)
+declare <32 x i64> @llvm.masked.gather.v32i64(<32 x ptr>, i32, <32 x i1>, <32 x i64>)
+
+declare <2 x half> @llvm.masked.gather.v2f16(<2 x ptr>, i32, <2 x i1>, <2 x half>)
+declare <4 x half> @llvm.masked.gather.v4f16(<4 x ptr>, i32, <4 x i1>, <4 x half>)
+declare <8 x half> @llvm.masked.gather.v8f16(<8 x ptr>, i32, <8 x i1>, <8 x half>)
+declare <16 x half> @llvm.masked.gather.v16f16(<16 x ptr>, i32, <16 x i1>, <16 x half>)
+declare <32 x half> @llvm.masked.gather.v32f16(<32 x ptr>, i32, <32 x i1>, <32 x half>)
+
+declare <2 x float> @llvm.masked.gather.v2f32(<2 x ptr>, i32, <2 x i1>, <2 x float>)
+declare <4 x float> @llvm.masked.gather.v4f32(<4 x ptr>, i32, <4 x i1>, <4 x float>)
+declare <8 x float> @llvm.masked.gather.v8f32(<8 x ptr>, i32, <8 x i1>, <8 x float>)
+declare <16 x float> @llvm.masked.gather.v16f32(<16 x ptr>, i32, <16 x i1>, <16 x float>)
+declare <32 x float> @llvm.masked.gather.v32f32(<32 x ptr>, i32, <32 x i1>, <32 x float>)
+
+declare <1 x double> @llvm.masked.gather.v1f64(<1 x ptr>, i32, <1 x i1>, <1 x double>)
+declare <2 x double> @llvm.masked.gather.v2f64(<2 x ptr>, i32, <2 x i1>, <2 x double>)
+declare <4 x double> @llvm.masked.gather.v4f64(<4 x ptr>, i32, <4 x i1>, <4 x double>)
+declare <8 x double> @llvm.masked.gather.v8f64(<8 x ptr>, i32, <8 x i1>, <8 x double>)
+declare <16 x double> @llvm.masked.gather.v16f64(<16 x ptr>, i32, <16 x i1>, <16 x double>)
+declare <32 x double> @llvm.masked.gather.v32f64(<32 x ptr>, i32, <32 x i1>, <32 x double>)
attributes #0 = { "target-features"="+sve" }
; Masked Loads
;
-define <2 x half> @masked_load_v2f16(<2 x half>* %ap, <2 x half>* %bp) vscale_range(2,0) #0 {
+define <2 x half> @masked_load_v2f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_load_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s1, [x0]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %a = load <2 x half>, <2 x half>* %ap
- %b = load <2 x half>, <2 x half>* %bp
+ %a = load <2 x half>, ptr %ap
+ %b = load <2 x half>, ptr %bp
%mask = fcmp oeq <2 x half> %a, %b
- %load = call <2 x half> @llvm.masked.load.v2f16(<2 x half>* %ap, i32 8, <2 x i1> %mask, <2 x half> zeroinitializer)
+ %load = call <2 x half> @llvm.masked.load.v2f16(ptr %ap, i32 8, <2 x i1> %mask, <2 x half> zeroinitializer)
ret <2 x half> %load
}
-define <2 x float> @masked_load_v2f32(<2 x float>* %ap, <2 x float>* %bp) vscale_range(2,0) #0 {
+define <2 x float> @masked_load_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_load_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %a = load <2 x float>, <2 x float>* %ap
- %b = load <2 x float>, <2 x float>* %bp
+ %a = load <2 x float>, ptr %ap
+ %b = load <2 x float>, ptr %bp
%mask = fcmp oeq <2 x float> %a, %b
- %load = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %ap, i32 8, <2 x i1> %mask, <2 x float> zeroinitializer)
+ %load = call <2 x float> @llvm.masked.load.v2f32(ptr %ap, i32 8, <2 x i1> %mask, <2 x float> zeroinitializer)
ret <2 x float> %load
}
-define <4 x float> @masked_load_v4f32(<4 x float>* %ap, <4 x float>* %bp) vscale_range(2,0) #0 {
+define <4 x float> @masked_load_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_load_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
- %a = load <4 x float>, <4 x float>* %ap
- %b = load <4 x float>, <4 x float>* %bp
+ %a = load <4 x float>, ptr %ap
+ %b = load <4 x float>, ptr %bp
%mask = fcmp oeq <4 x float> %a, %b
- %load = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %ap, i32 8, <4 x i1> %mask, <4 x float> zeroinitializer)
+ %load = call <4 x float> @llvm.masked.load.v4f32(ptr %ap, i32 8, <4 x i1> %mask, <4 x float> zeroinitializer)
ret <4 x float> %load
}
-define <8 x float> @masked_load_v8f32(<8 x float>* %ap, <8 x float>* %bp) vscale_range(2,0) #0 {
+define <8 x float> @masked_load_v8f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_load_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %a = load <8 x float>, <8 x float>* %ap
- %b = load <8 x float>, <8 x float>* %bp
+ %a = load <8 x float>, ptr %ap
+ %b = load <8 x float>, ptr %bp
%mask = fcmp oeq <8 x float> %a, %b
- %load = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %ap, i32 8, <8 x i1> %mask, <8 x float> zeroinitializer)
+ %load = call <8 x float> @llvm.masked.load.v8f32(ptr %ap, i32 8, <8 x i1> %mask, <8 x float> zeroinitializer)
ret <8 x float> %load
}
-define <16 x float> @masked_load_v16f32(<16 x float>* %ap, <16 x float>* %bp) #0 {
+define <16 x float> @masked_load_v16f32(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #8
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %a = load <16 x float>, <16 x float>* %ap
- %b = load <16 x float>, <16 x float>* %bp
+ %a = load <16 x float>, ptr %ap
+ %b = load <16 x float>, ptr %bp
%mask = fcmp oeq <16 x float> %a, %b
- %load = call <16 x float> @llvm.masked.load.v16f32(<16 x float>* %ap, i32 8, <16 x i1> %mask, <16 x float> zeroinitializer)
+ %load = call <16 x float> @llvm.masked.load.v16f32(ptr %ap, i32 8, <16 x i1> %mask, <16 x float> zeroinitializer)
ret <16 x float> %load
}
-define <32 x float> @masked_load_v32f32(<32 x float>* %ap, <32 x float>* %bp) vscale_range(8,0) #0 {
+define <32 x float> @masked_load_v32f32(ptr %ap, ptr %bp) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_load_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %a = load <32 x float>, <32 x float>* %ap
- %b = load <32 x float>, <32 x float>* %bp
+ %a = load <32 x float>, ptr %ap
+ %b = load <32 x float>, ptr %bp
%mask = fcmp oeq <32 x float> %a, %b
- %load = call <32 x float> @llvm.masked.load.v32f32(<32 x float>* %ap, i32 8, <32 x i1> %mask, <32 x float> zeroinitializer)
+ %load = call <32 x float> @llvm.masked.load.v32f32(ptr %ap, i32 8, <32 x i1> %mask, <32 x float> zeroinitializer)
ret <32 x float> %load
}
-define <64 x float> @masked_load_v64f32(<64 x float>* %ap, <64 x float>* %bp) vscale_range(16,0) #0 {
+define <64 x float> @masked_load_v64f32(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %a = load <64 x float>, <64 x float>* %ap
- %b = load <64 x float>, <64 x float>* %bp
+ %a = load <64 x float>, ptr %ap
+ %b = load <64 x float>, ptr %bp
%mask = fcmp oeq <64 x float> %a, %b
- %load = call <64 x float> @llvm.masked.load.v64f32(<64 x float>* %ap, i32 8, <64 x i1> %mask, <64 x float> zeroinitializer)
+ %load = call <64 x float> @llvm.masked.load.v64f32(ptr %ap, i32 8, <64 x i1> %mask, <64 x float> zeroinitializer)
ret <64 x float> %load
}
-define <64 x i8> @masked_load_v64i8(<64 x i8>* %ap, <64 x i8>* %bp) #0 {
+define <64 x i8> @masked_load_v64i8(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w9, #32
; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %a = load <64 x i8>, <64 x i8>* %ap
- %b = load <64 x i8>, <64 x i8>* %bp
+ %a = load <64 x i8>, ptr %ap
+ %b = load <64 x i8>, ptr %bp
%mask = icmp eq <64 x i8> %a, %b
- %load = call <64 x i8> @llvm.masked.load.v64i8(<64 x i8>* %ap, i32 8, <64 x i1> %mask, <64 x i8> undef)
+ %load = call <64 x i8> @llvm.masked.load.v64i8(ptr %ap, i32 8, <64 x i1> %mask, <64 x i8> undef)
ret <64 x i8> %load
}
-define <32 x i16> @masked_load_v32i16(<32 x i16>* %ap, <32 x i16>* %bp) #0 {
+define <32 x i16> @masked_load_v32i16(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #16
; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %a = load <32 x i16>, <32 x i16>* %ap
- %b = load <32 x i16>, <32 x i16>* %bp
+ %a = load <32 x i16>, ptr %ap
+ %b = load <32 x i16>, ptr %bp
%mask = icmp eq <32 x i16> %a, %b
- %load = call <32 x i16> @llvm.masked.load.v32i16(<32 x i16>* %ap, i32 8, <32 x i1> %mask, <32 x i16> undef)
+ %load = call <32 x i16> @llvm.masked.load.v32i16(ptr %ap, i32 8, <32 x i1> %mask, <32 x i16> undef)
ret <32 x i16> %load
}
-define <16 x i32> @masked_load_v16i32(<16 x i32>* %ap, <16 x i32>* %bp) #0 {
+define <16 x i32> @masked_load_v16i32(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #8
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %a = load <16 x i32>, <16 x i32>* %ap
- %b = load <16 x i32>, <16 x i32>* %bp
+ %a = load <16 x i32>, ptr %ap
+ %b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %a, %b
- %load = call <16 x i32> @llvm.masked.load.v16i32(<16 x i32>* %ap, i32 8, <16 x i1> %mask, <16 x i32> undef)
+ %load = call <16 x i32> @llvm.masked.load.v16i32(ptr %ap, i32 8, <16 x i1> %mask, <16 x i32> undef)
ret <16 x i32> %load
}
-define <8 x i64> @masked_load_v8i64(<8 x i64>* %ap, <8 x i64>* %bp) #0 {
+define <8 x i64> @masked_load_v8i64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #4
; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %a = load <8 x i64>, <8 x i64>* %ap
- %b = load <8 x i64>, <8 x i64>* %bp
+ %a = load <8 x i64>, ptr %ap
+ %b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %a, %b
- %load = call <8 x i64> @llvm.masked.load.v8i64(<8 x i64>* %ap, i32 8, <8 x i1> %mask, <8 x i64> undef)
+ %load = call <8 x i64> @llvm.masked.load.v8i64(ptr %ap, i32 8, <8 x i1> %mask, <8 x i64> undef)
ret <8 x i64> %load
}
-define <8 x i64> @masked_load_passthru_v8i64(<8 x i64>* %ap, <8 x i64>* %bp) #0 {
+define <8 x i64> @masked_load_passthru_v8i64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_passthru_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #4
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %a = load <8 x i64>, <8 x i64>* %ap
- %b = load <8 x i64>, <8 x i64>* %bp
+ %a = load <8 x i64>, ptr %ap
+ %b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %a, %b
- %load = call <8 x i64> @llvm.masked.load.v8i64(<8 x i64>* %ap, i32 8, <8 x i1> %mask, <8 x i64> %b)
+ %load = call <8 x i64> @llvm.masked.load.v8i64(ptr %ap, i32 8, <8 x i1> %mask, <8 x i64> %b)
ret <8 x i64> %load
}
-define <8 x double> @masked_load_passthru_v8f64(<8 x double>* %ap, <8 x double>* %bp) #0 {
+define <8 x double> @masked_load_passthru_v8f64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_passthru_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #4
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %a = load <8 x double>, <8 x double>* %ap
- %b = load <8 x double>, <8 x double>* %bp
+ %a = load <8 x double>, ptr %ap
+ %b = load <8 x double>, ptr %bp
%mask = fcmp oeq <8 x double> %a, %b
- %load = call <8 x double> @llvm.masked.load.v8f64(<8 x double>* %ap, i32 8, <8 x i1> %mask, <8 x double> %b)
+ %load = call <8 x double> @llvm.masked.load.v8f64(ptr %ap, i32 8, <8 x i1> %mask, <8 x double> %b)
ret <8 x double> %load
}
-define <32 x i16> @masked_load_sext_v32i8i16(<32 x i8>* %ap, <32 x i8>* %bp) #0 {
+define <32 x i16> @masked_load_sext_v32i8i16(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v32i8i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_512-NEXT: ld1sb { z0.h }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <32 x i8>, <32 x i8>* %bp
+ %b = load <32 x i8>, ptr %bp
%mask = icmp eq <32 x i8> %b, zeroinitializer
- %load = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %ap, i32 8, <32 x i1> %mask, <32 x i8> undef)
+ %load = call <32 x i8> @llvm.masked.load.v32i8(ptr %ap, i32 8, <32 x i1> %mask, <32 x i8> undef)
%ext = sext <32 x i8> %load to <32 x i16>
ret <32 x i16> %ext
}
-define <16 x i32> @masked_load_sext_v16i8i32(<16 x i8>* %ap, <16 x i8>* %bp) #0 {
+define <16 x i32> @masked_load_sext_v16i8i32(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v16i8i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x1]
; VBITS_GE_512-NEXT: ld1sb { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <16 x i8>, <16 x i8>* %bp
+ %b = load <16 x i8>, ptr %bp
%mask = icmp eq <16 x i8> %b, zeroinitializer
- %load = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %ap, i32 8, <16 x i1> %mask, <16 x i8> undef)
+ %load = call <16 x i8> @llvm.masked.load.v16i8(ptr %ap, i32 8, <16 x i1> %mask, <16 x i8> undef)
%ext = sext <16 x i8> %load to <16 x i32>
ret <16 x i32> %ext
}
-define <8 x i64> @masked_load_sext_v8i8i64(<8 x i8>* %ap, <8 x i8>* %bp) #0 {
+define <8 x i64> @masked_load_sext_v8i8i64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v8i8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr d0, [x1]
; VBITS_GE_512-NEXT: ld1sb { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i8>, <8 x i8>* %bp
+ %b = load <8 x i8>, ptr %bp
%mask = icmp eq <8 x i8> %b, zeroinitializer
- %load = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %ap, i32 8, <8 x i1> %mask, <8 x i8> undef)
+ %load = call <8 x i8> @llvm.masked.load.v8i8(ptr %ap, i32 8, <8 x i1> %mask, <8 x i8> undef)
%ext = sext <8 x i8> %load to <8 x i64>
ret <8 x i64> %ext
}
-define <16 x i32> @masked_load_sext_v16i16i32(<16 x i16>* %ap, <16 x i16>* %bp) #0 {
+define <16 x i32> @masked_load_sext_v16i16i32(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v16i16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_512-NEXT: ld1sh { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <16 x i16>, <16 x i16>* %bp
+ %b = load <16 x i16>, ptr %bp
%mask = icmp eq <16 x i16> %b, zeroinitializer
- %load = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %ap, i32 8, <16 x i1> %mask, <16 x i16> undef)
+ %load = call <16 x i16> @llvm.masked.load.v16i16(ptr %ap, i32 8, <16 x i1> %mask, <16 x i16> undef)
%ext = sext <16 x i16> %load to <16 x i32>
ret <16 x i32> %ext
}
-define <8 x i64> @masked_load_sext_v8i16i64(<8 x i16>* %ap, <8 x i16>* %bp) #0 {
+define <8 x i64> @masked_load_sext_v8i16i64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v8i16i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x1]
; VBITS_GE_512-NEXT: ld1sh { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i16>, <8 x i16>* %bp
+ %b = load <8 x i16>, ptr %bp
%mask = icmp eq <8 x i16> %b, zeroinitializer
- %load = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %ap, i32 8, <8 x i1> %mask, <8 x i16> undef)
+ %load = call <8 x i16> @llvm.masked.load.v8i16(ptr %ap, i32 8, <8 x i1> %mask, <8 x i16> undef)
%ext = sext <8 x i16> %load to <8 x i64>
ret <8 x i64> %ext
}
-define <8 x i64> @masked_load_sext_v8i32i64(<8 x i32>* %ap, <8 x i32>* %bp) #0 {
+define <8 x i64> @masked_load_sext_v8i32i64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v8i32i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: ld1sw { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i32>, <8 x i32>* %bp
+ %b = load <8 x i32>, ptr %bp
%mask = icmp eq <8 x i32> %b, zeroinitializer
- %load = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %ap, i32 8, <8 x i1> %mask, <8 x i32> undef)
+ %load = call <8 x i32> @llvm.masked.load.v8i32(ptr %ap, i32 8, <8 x i1> %mask, <8 x i32> undef)
%ext = sext <8 x i32> %load to <8 x i64>
ret <8 x i64> %ext
}
-define <32 x i16> @masked_load_zext_v32i8i16(<32 x i8>* %ap, <32 x i8>* %bp) #0 {
+define <32 x i16> @masked_load_zext_v32i8i16(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v32i8i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_512-NEXT: ld1b { z0.h }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <32 x i8>, <32 x i8>* %bp
+ %b = load <32 x i8>, ptr %bp
%mask = icmp eq <32 x i8> %b, zeroinitializer
- %load = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %ap, i32 8, <32 x i1> %mask, <32 x i8> undef)
+ %load = call <32 x i8> @llvm.masked.load.v32i8(ptr %ap, i32 8, <32 x i1> %mask, <32 x i8> undef)
%ext = zext <32 x i8> %load to <32 x i16>
ret <32 x i16> %ext
}
-define <16 x i32> @masked_load_zext_v16i8i32(<16 x i8>* %ap, <16 x i8>* %bp) #0 {
+define <16 x i32> @masked_load_zext_v16i8i32(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v16i8i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x1]
; VBITS_GE_512-NEXT: ld1b { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <16 x i8>, <16 x i8>* %bp
+ %b = load <16 x i8>, ptr %bp
%mask = icmp eq <16 x i8> %b, zeroinitializer
- %load = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %ap, i32 8, <16 x i1> %mask, <16 x i8> undef)
+ %load = call <16 x i8> @llvm.masked.load.v16i8(ptr %ap, i32 8, <16 x i1> %mask, <16 x i8> undef)
%ext = zext <16 x i8> %load to <16 x i32>
ret <16 x i32> %ext
}
-define <8 x i64> @masked_load_zext_v8i8i64(<8 x i8>* %ap, <8 x i8>* %bp) #0 {
+define <8 x i64> @masked_load_zext_v8i8i64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v8i8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr d0, [x1]
; VBITS_GE_512-NEXT: ld1b { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i8>, <8 x i8>* %bp
+ %b = load <8 x i8>, ptr %bp
%mask = icmp eq <8 x i8> %b, zeroinitializer
- %load = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %ap, i32 8, <8 x i1> %mask, <8 x i8> undef)
+ %load = call <8 x i8> @llvm.masked.load.v8i8(ptr %ap, i32 8, <8 x i1> %mask, <8 x i8> undef)
%ext = zext <8 x i8> %load to <8 x i64>
ret <8 x i64> %ext
}
-define <16 x i32> @masked_load_zext_v16i16i32(<16 x i16>* %ap, <16 x i16>* %bp) #0 {
+define <16 x i32> @masked_load_zext_v16i16i32(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v16i16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_512-NEXT: ld1h { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <16 x i16>, <16 x i16>* %bp
+ %b = load <16 x i16>, ptr %bp
%mask = icmp eq <16 x i16> %b, zeroinitializer
- %load = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %ap, i32 8, <16 x i1> %mask, <16 x i16> undef)
+ %load = call <16 x i16> @llvm.masked.load.v16i16(ptr %ap, i32 8, <16 x i1> %mask, <16 x i16> undef)
%ext = zext <16 x i16> %load to <16 x i32>
ret <16 x i32> %ext
}
-define <8 x i64> @masked_load_zext_v8i16i64(<8 x i16>* %ap, <8 x i16>* %bp) #0 {
+define <8 x i64> @masked_load_zext_v8i16i64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v8i16i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x1]
; VBITS_GE_512-NEXT: ld1h { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i16>, <8 x i16>* %bp
+ %b = load <8 x i16>, ptr %bp
%mask = icmp eq <8 x i16> %b, zeroinitializer
- %load = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %ap, i32 8, <8 x i1> %mask, <8 x i16> undef)
+ %load = call <8 x i16> @llvm.masked.load.v8i16(ptr %ap, i32 8, <8 x i1> %mask, <8 x i16> undef)
%ext = zext <8 x i16> %load to <8 x i64>
ret <8 x i64> %ext
}
-define <8 x i64> @masked_load_zext_v8i32i64(<8 x i32>* %ap, <8 x i32>* %bp) #0 {
+define <8 x i64> @masked_load_zext_v8i32i64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v8i32i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: ld1w { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i32>, <8 x i32>* %bp
+ %b = load <8 x i32>, ptr %bp
%mask = icmp eq <8 x i32> %b, zeroinitializer
- %load = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %ap, i32 8, <8 x i1> %mask, <8 x i32> undef)
+ %load = call <8 x i32> @llvm.masked.load.v8i32(ptr %ap, i32 8, <8 x i1> %mask, <8 x i32> undef)
%ext = zext <8 x i32> %load to <8 x i64>
ret <8 x i64> %ext
}
-define <32 x i16> @masked_load_sext_v32i8i16_m16(<32 x i8>* %ap, <32 x i16>* %bp) #0 {
+define <32 x i16> @masked_load_sext_v32i8i16_m16(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v32i8i16_m16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #16
; VBITS_GE_512-NEXT: ld1sb { z0.h }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <32 x i16>, <32 x i16>* %bp
+ %b = load <32 x i16>, ptr %bp
%mask = icmp eq <32 x i16> %b, zeroinitializer
- %load = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %ap, i32 8, <32 x i1> %mask, <32 x i8> undef)
+ %load = call <32 x i8> @llvm.masked.load.v32i8(ptr %ap, i32 8, <32 x i1> %mask, <32 x i8> undef)
%ext = sext <32 x i8> %load to <32 x i16>
ret <32 x i16> %ext
}
-define <16 x i32> @masked_load_sext_v16i8i32_m32(<16 x i8>* %ap, <16 x i32>* %bp) #0 {
+define <16 x i32> @masked_load_sext_v16i8i32_m32(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v16i8i32_m32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #8
; VBITS_GE_512-NEXT: ld1sb { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <16 x i32>, <16 x i32>* %bp
+ %b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %b, zeroinitializer
- %load = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %ap, i32 8, <16 x i1> %mask, <16 x i8> undef)
+ %load = call <16 x i8> @llvm.masked.load.v16i8(ptr %ap, i32 8, <16 x i1> %mask, <16 x i8> undef)
%ext = sext <16 x i8> %load to <16 x i32>
ret <16 x i32> %ext
}
-define <8 x i64> @masked_load_sext_v8i8i64_m64(<8 x i8>* %ap, <8 x i64>* %bp) #0 {
+define <8 x i64> @masked_load_sext_v8i8i64_m64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v8i8i64_m64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #4
; VBITS_GE_512-NEXT: ld1sb { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i64>, <8 x i64>* %bp
+ %b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %b, zeroinitializer
- %load = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %ap, i32 8, <8 x i1> %mask, <8 x i8> undef)
+ %load = call <8 x i8> @llvm.masked.load.v8i8(ptr %ap, i32 8, <8 x i1> %mask, <8 x i8> undef)
%ext = sext <8 x i8> %load to <8 x i64>
ret <8 x i64> %ext
}
-define <16 x i32> @masked_load_sext_v16i16i32_m32(<16 x i16>* %ap, <16 x i32>* %bp) #0 {
+define <16 x i32> @masked_load_sext_v16i16i32_m32(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v16i16i32_m32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #8
; VBITS_GE_512-NEXT: ld1sh { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <16 x i32>, <16 x i32>* %bp
+ %b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %b, zeroinitializer
- %load = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %ap, i32 8, <16 x i1> %mask, <16 x i16> undef)
+ %load = call <16 x i16> @llvm.masked.load.v16i16(ptr %ap, i32 8, <16 x i1> %mask, <16 x i16> undef)
%ext = sext <16 x i16> %load to <16 x i32>
ret <16 x i32> %ext
}
-define <8 x i64> @masked_load_sext_v8i16i64_m64(<8 x i16>* %ap, <8 x i64>* %bp) #0 {
+define <8 x i64> @masked_load_sext_v8i16i64_m64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v8i16i64_m64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #4
; VBITS_GE_512-NEXT: ld1sh { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i64>, <8 x i64>* %bp
+ %b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %b, zeroinitializer
- %load = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %ap, i32 8, <8 x i1> %mask, <8 x i16> undef)
+ %load = call <8 x i16> @llvm.masked.load.v8i16(ptr %ap, i32 8, <8 x i1> %mask, <8 x i16> undef)
%ext = sext <8 x i16> %load to <8 x i64>
ret <8 x i64> %ext
}
-define <8 x i64> @masked_load_sext_v8i32i64_m64(<8 x i32>* %ap, <8 x i64>* %bp) #0 {
+define <8 x i64> @masked_load_sext_v8i32i64_m64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v8i32i64_m64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #4
; VBITS_GE_512-NEXT: ld1sw { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i64>, <8 x i64>* %bp
+ %b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %b, zeroinitializer
- %load = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %ap, i32 8, <8 x i1> %mask, <8 x i32> undef)
+ %load = call <8 x i32> @llvm.masked.load.v8i32(ptr %ap, i32 8, <8 x i1> %mask, <8 x i32> undef)
%ext = sext <8 x i32> %load to <8 x i64>
ret <8 x i64> %ext
}
-define <32 x i16> @masked_load_zext_v32i8i16_m16(<32 x i8>* %ap, <32 x i16>* %bp) #0 {
+define <32 x i16> @masked_load_zext_v32i8i16_m16(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v32i8i16_m16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #16
; VBITS_GE_512-NEXT: ld1b { z0.h }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <32 x i16>, <32 x i16>* %bp
+ %b = load <32 x i16>, ptr %bp
%mask = icmp eq <32 x i16> %b, zeroinitializer
- %load = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %ap, i32 8, <32 x i1> %mask, <32 x i8> undef)
+ %load = call <32 x i8> @llvm.masked.load.v32i8(ptr %ap, i32 8, <32 x i1> %mask, <32 x i8> undef)
%ext = zext <32 x i8> %load to <32 x i16>
ret <32 x i16> %ext
}
-define <16 x i32> @masked_load_zext_v16i8i32_m32(<16 x i8>* %ap, <16 x i32>* %bp) #0 {
+define <16 x i32> @masked_load_zext_v16i8i32_m32(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v16i8i32_m32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #8
; VBITS_GE_512-NEXT: ld1b { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <16 x i32>, <16 x i32>* %bp
+ %b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %b, zeroinitializer
- %load = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %ap, i32 8, <16 x i1> %mask, <16 x i8> undef)
+ %load = call <16 x i8> @llvm.masked.load.v16i8(ptr %ap, i32 8, <16 x i1> %mask, <16 x i8> undef)
%ext = zext <16 x i8> %load to <16 x i32>
ret <16 x i32> %ext
}
-define <8 x i64> @masked_load_zext_v8i8i64_m64(<8 x i8>* %ap, <8 x i64>* %bp) #0 {
+define <8 x i64> @masked_load_zext_v8i8i64_m64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v8i8i64_m64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #4
; VBITS_GE_512-NEXT: ld1b { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i64>, <8 x i64>* %bp
+ %b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %b, zeroinitializer
- %load = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %ap, i32 8, <8 x i1> %mask, <8 x i8> undef)
+ %load = call <8 x i8> @llvm.masked.load.v8i8(ptr %ap, i32 8, <8 x i1> %mask, <8 x i8> undef)
%ext = zext <8 x i8> %load to <8 x i64>
ret <8 x i64> %ext
}
-define <16 x i32> @masked_load_zext_v16i16i32_m32(<16 x i16>* %ap, <16 x i32>* %bp) #0 {
+define <16 x i32> @masked_load_zext_v16i16i32_m32(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v16i16i32_m32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #8
; VBITS_GE_512-NEXT: ld1h { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <16 x i32>, <16 x i32>* %bp
+ %b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %b, zeroinitializer
- %load = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %ap, i32 8, <16 x i1> %mask, <16 x i16> undef)
+ %load = call <16 x i16> @llvm.masked.load.v16i16(ptr %ap, i32 8, <16 x i1> %mask, <16 x i16> undef)
%ext = zext <16 x i16> %load to <16 x i32>
ret <16 x i32> %ext
}
-define <8 x i64> @masked_load_zext_v8i16i64_m64(<8 x i16>* %ap, <8 x i64>* %bp) #0 {
+define <8 x i64> @masked_load_zext_v8i16i64_m64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v8i16i64_m64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #4
; VBITS_GE_512-NEXT: ld1h { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i64>, <8 x i64>* %bp
+ %b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %b, zeroinitializer
- %load = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %ap, i32 8, <8 x i1> %mask, <8 x i16> undef)
+ %load = call <8 x i16> @llvm.masked.load.v8i16(ptr %ap, i32 8, <8 x i1> %mask, <8 x i16> undef)
%ext = zext <8 x i16> %load to <8 x i64>
ret <8 x i64> %ext
}
-define <8 x i64> @masked_load_zext_v8i32i64_m64(<8 x i32>* %ap, <8 x i64>* %bp) #0 {
+define <8 x i64> @masked_load_zext_v8i32i64_m64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v8i32i64_m64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #4
; VBITS_GE_512-NEXT: ld1w { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i64>, <8 x i64>* %bp
+ %b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %b, zeroinitializer
- %load = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %ap, i32 8, <8 x i1> %mask, <8 x i32> undef)
+ %load = call <8 x i32> @llvm.masked.load.v8i32(ptr %ap, i32 8, <8 x i1> %mask, <8 x i32> undef)
%ext = zext <8 x i32> %load to <8 x i64>
ret <8 x i64> %ext
}
-define <128 x i16> @masked_load_sext_v128i8i16(<128 x i8>* %ap, <128 x i8>* %bp) vscale_range(16,0) #0 {
+define <128 x i16> @masked_load_sext_v128i8i16(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_sext_v128i8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x8]
; CHECK-NEXT: ret
- %b = load <128 x i8>, <128 x i8>* %bp
+ %b = load <128 x i8>, ptr %bp
%mask = icmp eq <128 x i8> %b, zeroinitializer
- %load = call <128 x i8> @llvm.masked.load.v128i8(<128 x i8>* %ap, i32 8, <128 x i1> %mask, <128 x i8> undef)
+ %load = call <128 x i8> @llvm.masked.load.v128i8(ptr %ap, i32 8, <128 x i1> %mask, <128 x i8> undef)
%ext = sext <128 x i8> %load to <128 x i16>
ret <128 x i16> %ext
}
-define <64 x i32> @masked_load_sext_v64i8i32(<64 x i8>* %ap, <64 x i8>* %bp) vscale_range(16,0) #0 {
+define <64 x i32> @masked_load_sext_v64i8i32(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_sext_v64i8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1sb { z0.s }, p1/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %b = load <64 x i8>, <64 x i8>* %bp
+ %b = load <64 x i8>, ptr %bp
%mask = icmp eq <64 x i8> %b, zeroinitializer
- %load = call <64 x i8> @llvm.masked.load.v64i8(<64 x i8>* %ap, i32 8, <64 x i1> %mask, <64 x i8> undef)
+ %load = call <64 x i8> @llvm.masked.load.v64i8(ptr %ap, i32 8, <64 x i1> %mask, <64 x i8> undef)
%ext = sext <64 x i8> %load to <64 x i32>
ret <64 x i32> %ext
}
-define <32 x i64> @masked_load_sext_v32i8i64(<32 x i8>* %ap, <32 x i8>* %bp) vscale_range(16,0) #0 {
+define <32 x i64> @masked_load_sext_v32i8i64(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_sext_v32i8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1sb { z0.d }, p1/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: ret
- %b = load <32 x i8>, <32 x i8>* %bp
+ %b = load <32 x i8>, ptr %bp
%mask = icmp eq <32 x i8> %b, zeroinitializer
- %load = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %ap, i32 8, <32 x i1> %mask, <32 x i8> undef)
+ %load = call <32 x i8> @llvm.masked.load.v32i8(ptr %ap, i32 8, <32 x i1> %mask, <32 x i8> undef)
%ext = sext <32 x i8> %load to <32 x i64>
ret <32 x i64> %ext
}
-define <64 x i32> @masked_load_sext_v64i16i32(<64 x i16>* %ap, <64 x i16>* %bp) vscale_range(16,0) #0 {
+define <64 x i32> @masked_load_sext_v64i16i32(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_sext_v64i16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %b = load <64 x i16>, <64 x i16>* %bp
+ %b = load <64 x i16>, ptr %bp
%mask = icmp eq <64 x i16> %b, zeroinitializer
- %load = call <64 x i16> @llvm.masked.load.v64i16(<64 x i16>* %ap, i32 8, <64 x i1> %mask, <64 x i16> undef)
+ %load = call <64 x i16> @llvm.masked.load.v64i16(ptr %ap, i32 8, <64 x i1> %mask, <64 x i16> undef)
%ext = sext <64 x i16> %load to <64 x i32>
ret <64 x i32> %ext
}
-define <32 x i64> @masked_load_sext_v32i16i64(<32 x i16>* %ap, <32 x i16>* %bp) vscale_range(16,0) #0 {
+define <32 x i64> @masked_load_sext_v32i16i64(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_sext_v32i16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1sh { z0.d }, p1/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: ret
- %b = load <32 x i16>, <32 x i16>* %bp
+ %b = load <32 x i16>, ptr %bp
%mask = icmp eq <32 x i16> %b, zeroinitializer
- %load = call <32 x i16> @llvm.masked.load.v32i16(<32 x i16>* %ap, i32 8, <32 x i1> %mask, <32 x i16> undef)
+ %load = call <32 x i16> @llvm.masked.load.v32i16(ptr %ap, i32 8, <32 x i1> %mask, <32 x i16> undef)
%ext = sext <32 x i16> %load to <32 x i64>
ret <32 x i64> %ext
}
-define <32 x i64> @masked_load_sext_v32i32i64(<32 x i32>* %ap, <32 x i32>* %bp) vscale_range(16,0) #0 {
+define <32 x i64> @masked_load_sext_v32i32i64(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_sext_v32i32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1sw { z0.d }, p1/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: ret
- %b = load <32 x i32>, <32 x i32>* %bp
+ %b = load <32 x i32>, ptr %bp
%mask = icmp eq <32 x i32> %b, zeroinitializer
- %load = call <32 x i32> @llvm.masked.load.v32i32(<32 x i32>* %ap, i32 8, <32 x i1> %mask, <32 x i32> undef)
+ %load = call <32 x i32> @llvm.masked.load.v32i32(ptr %ap, i32 8, <32 x i1> %mask, <32 x i32> undef)
%ext = sext <32 x i32> %load to <32 x i64>
ret <32 x i64> %ext
}
-define <128 x i16> @masked_load_zext_v128i8i16(<128 x i8>* %ap, <128 x i8>* %bp) vscale_range(16,0) #0 {
+define <128 x i16> @masked_load_zext_v128i8i16(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_zext_v128i8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ld1b { z0.h }, p1/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x8]
; CHECK-NEXT: ret
- %b = load <128 x i8>, <128 x i8>* %bp
+ %b = load <128 x i8>, ptr %bp
%mask = icmp eq <128 x i8> %b, zeroinitializer
- %load = call <128 x i8> @llvm.masked.load.v128i8(<128 x i8>* %ap, i32 8, <128 x i1> %mask, <128 x i8> undef)
+ %load = call <128 x i8> @llvm.masked.load.v128i8(ptr %ap, i32 8, <128 x i1> %mask, <128 x i8> undef)
%ext = zext <128 x i8> %load to <128 x i16>
ret <128 x i16> %ext
}
-define <64 x i32> @masked_load_zext_v64i8i32(<64 x i8>* %ap, <64 x i8>* %bp) vscale_range(16,0) #0 {
+define <64 x i32> @masked_load_zext_v64i8i32(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_zext_v64i8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1b { z0.s }, p1/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %b = load <64 x i8>, <64 x i8>* %bp
+ %b = load <64 x i8>, ptr %bp
%mask = icmp eq <64 x i8> %b, zeroinitializer
- %load = call <64 x i8> @llvm.masked.load.v64i8(<64 x i8>* %ap, i32 8, <64 x i1> %mask, <64 x i8> undef)
+ %load = call <64 x i8> @llvm.masked.load.v64i8(ptr %ap, i32 8, <64 x i1> %mask, <64 x i8> undef)
%ext = zext <64 x i8> %load to <64 x i32>
ret <64 x i32> %ext
}
-define <32 x i64> @masked_load_zext_v32i8i64(<32 x i8>* %ap, <32 x i8>* %bp) vscale_range(16,0) #0 {
+define <32 x i64> @masked_load_zext_v32i8i64(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_zext_v32i8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1b { z0.d }, p1/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: ret
- %b = load <32 x i8>, <32 x i8>* %bp
+ %b = load <32 x i8>, ptr %bp
%mask = icmp eq <32 x i8> %b, zeroinitializer
- %load = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %ap, i32 8, <32 x i1> %mask, <32 x i8> undef)
+ %load = call <32 x i8> @llvm.masked.load.v32i8(ptr %ap, i32 8, <32 x i1> %mask, <32 x i8> undef)
%ext = zext <32 x i8> %load to <32 x i64>
ret <32 x i64> %ext
}
-define <64 x i32> @masked_load_zext_v64i16i32(<64 x i16>* %ap, <64 x i16>* %bp) vscale_range(16,0) #0 {
+define <64 x i32> @masked_load_zext_v64i16i32(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_zext_v64i16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1h { z0.s }, p1/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
- %b = load <64 x i16>, <64 x i16>* %bp
+ %b = load <64 x i16>, ptr %bp
%mask = icmp eq <64 x i16> %b, zeroinitializer
- %load = call <64 x i16> @llvm.masked.load.v64i16(<64 x i16>* %ap, i32 8, <64 x i1> %mask, <64 x i16> undef)
+ %load = call <64 x i16> @llvm.masked.load.v64i16(ptr %ap, i32 8, <64 x i1> %mask, <64 x i16> undef)
%ext = zext <64 x i16> %load to <64 x i32>
ret <64 x i32> %ext
}
-define <32 x i64> @masked_load_zext_v32i16i64(<32 x i16>* %ap, <32 x i16>* %bp) vscale_range(16,0) #0 {
+define <32 x i64> @masked_load_zext_v32i16i64(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_zext_v32i16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1h { z0.d }, p1/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: ret
- %b = load <32 x i16>, <32 x i16>* %bp
+ %b = load <32 x i16>, ptr %bp
%mask = icmp eq <32 x i16> %b, zeroinitializer
- %load = call <32 x i16> @llvm.masked.load.v32i16(<32 x i16>* %ap, i32 8, <32 x i1> %mask, <32 x i16> undef)
+ %load = call <32 x i16> @llvm.masked.load.v32i16(ptr %ap, i32 8, <32 x i1> %mask, <32 x i16> undef)
%ext = zext <32 x i16> %load to <32 x i64>
ret <32 x i64> %ext
}
-define <32 x i64> @masked_load_zext_v32i32i64(<32 x i32>* %ap, <32 x i32>* %bp) vscale_range(16,0) #0 {
+define <32 x i64> @masked_load_zext_v32i32i64(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_zext_v32i32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1w { z0.d }, p1/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: ret
- %b = load <32 x i32>, <32 x i32>* %bp
+ %b = load <32 x i32>, ptr %bp
%mask = icmp eq <32 x i32> %b, zeroinitializer
- %load = call <32 x i32> @llvm.masked.load.v32i32(<32 x i32>* %ap, i32 8, <32 x i1> %mask, <32 x i32> undef)
+ %load = call <32 x i32> @llvm.masked.load.v32i32(ptr %ap, i32 8, <32 x i1> %mask, <32 x i32> undef)
%ext = zext <32 x i32> %load to <32 x i64>
ret <32 x i64> %ext
}
-define <8 x i64> @masked_load_sext_ugt_v8i32i64(<8 x i32>* %ap, <8 x i32>* %bp) #0 {
+define <8 x i64> @masked_load_sext_ugt_v8i32i64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_ugt_v8i32i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: ld1sw { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i32>, <8 x i32>* %bp
+ %b = load <8 x i32>, ptr %bp
%mask = icmp ugt <8 x i32> %b, zeroinitializer
- %load = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %ap, i32 8, <8 x i1> %mask, <8 x i32> undef)
+ %load = call <8 x i32> @llvm.masked.load.v8i32(ptr %ap, i32 8, <8 x i1> %mask, <8 x i32> undef)
%ext = sext <8 x i32> %load to <8 x i64>
ret <8 x i64> %ext
}
-define <8 x i64> @masked_load_zext_sgt_v8i32i64(<8 x i32>* %ap, <8 x i32>* %bp) #0 {
+define <8 x i64> @masked_load_zext_sgt_v8i32i64(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_sgt_v8i32i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: ld1w { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
; VBITS_GE_512-NEXT: ret
- %b = load <8 x i32>, <8 x i32>* %bp
+ %b = load <8 x i32>, ptr %bp
%mask = icmp sgt <8 x i32> %b, zeroinitializer
- %load = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %ap, i32 8, <8 x i1> %mask, <8 x i32> undef)
+ %load = call <8 x i32> @llvm.masked.load.v8i32(ptr %ap, i32 8, <8 x i1> %mask, <8 x i32> undef)
%ext = zext <8 x i32> %load to <8 x i64>
ret <8 x i64> %ext
}
-declare <2 x half> @llvm.masked.load.v2f16(<2 x half>*, i32, <2 x i1>, <2 x half>)
-declare <2 x float> @llvm.masked.load.v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
-declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
-declare <8 x float> @llvm.masked.load.v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
-declare <16 x float> @llvm.masked.load.v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
-declare <32 x float> @llvm.masked.load.v32f32(<32 x float>*, i32, <32 x i1>, <32 x float>)
-declare <64 x float> @llvm.masked.load.v64f32(<64 x float>*, i32, <64 x i1>, <64 x float>)
+declare <2 x half> @llvm.masked.load.v2f16(ptr, i32, <2 x i1>, <2 x half>)
+declare <2 x float> @llvm.masked.load.v2f32(ptr, i32, <2 x i1>, <2 x float>)
+declare <4 x float> @llvm.masked.load.v4f32(ptr, i32, <4 x i1>, <4 x float>)
+declare <8 x float> @llvm.masked.load.v8f32(ptr, i32, <8 x i1>, <8 x float>)
+declare <16 x float> @llvm.masked.load.v16f32(ptr, i32, <16 x i1>, <16 x float>)
+declare <32 x float> @llvm.masked.load.v32f32(ptr, i32, <32 x i1>, <32 x float>)
+declare <64 x float> @llvm.masked.load.v64f32(ptr, i32, <64 x i1>, <64 x float>)
-declare <128 x i8> @llvm.masked.load.v128i8(<128 x i8>*, i32, <128 x i1>, <128 x i8>)
-declare <64 x i8> @llvm.masked.load.v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>)
-declare <32 x i8> @llvm.masked.load.v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>)
-declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
-declare <16 x i16> @llvm.masked.load.v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>)
-declare <8 x i8> @llvm.masked.load.v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>)
-declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
-declare <8 x i32> @llvm.masked.load.v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
-declare <32 x i32> @llvm.masked.load.v32i32(<32 x i32>*, i32, <32 x i1>, <32 x i32>)
-declare <32 x i16> @llvm.masked.load.v32i16(<32 x i16>*, i32, <32 x i1>, <32 x i16>)
-declare <64 x i16> @llvm.masked.load.v64i16(<64 x i16>*, i32, <64 x i1>, <64 x i16>)
-declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
-declare <8 x i64> @llvm.masked.load.v8i64(<8 x i64>*, i32, <8 x i1>, <8 x i64>)
-declare <8 x double> @llvm.masked.load.v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
+declare <128 x i8> @llvm.masked.load.v128i8(ptr, i32, <128 x i1>, <128 x i8>)
+declare <64 x i8> @llvm.masked.load.v64i8(ptr, i32, <64 x i1>, <64 x i8>)
+declare <32 x i8> @llvm.masked.load.v32i8(ptr, i32, <32 x i1>, <32 x i8>)
+declare <16 x i8> @llvm.masked.load.v16i8(ptr, i32, <16 x i1>, <16 x i8>)
+declare <16 x i16> @llvm.masked.load.v16i16(ptr, i32, <16 x i1>, <16 x i16>)
+declare <8 x i8> @llvm.masked.load.v8i8(ptr, i32, <8 x i1>, <8 x i8>)
+declare <8 x i16> @llvm.masked.load.v8i16(ptr, i32, <8 x i1>, <8 x i16>)
+declare <8 x i32> @llvm.masked.load.v8i32(ptr, i32, <8 x i1>, <8 x i32>)
+declare <32 x i32> @llvm.masked.load.v32i32(ptr, i32, <32 x i1>, <32 x i32>)
+declare <32 x i16> @llvm.masked.load.v32i16(ptr, i32, <32 x i1>, <32 x i16>)
+declare <64 x i16> @llvm.masked.load.v64i16(ptr, i32, <64 x i1>, <64 x i16>)
+declare <16 x i32> @llvm.masked.load.v16i32(ptr, i32, <16 x i1>, <16 x i32>)
+declare <8 x i64> @llvm.masked.load.v8i64(ptr, i32, <8 x i1>, <8 x i64>)
+declare <8 x double> @llvm.masked.load.v8f64(ptr, i32, <8 x i1>, <8 x double>)
attributes #0 = { "target-features"="+sve" }
; ST1B
;
-define void @masked_scatter_v2i8(<2 x i8>* %a, <2 x i8*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v2i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
; CHECK-NEXT: st1b { z0.d }, p0, [z2.d]
; CHECK-NEXT: ret
- %vals = load <2 x i8>, <2 x i8>* %a
- %ptrs = load <2 x i8*>, <2 x i8*>* %b
+ %vals = load <2 x i8>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = icmp eq <2 x i8> %vals, zeroinitializer
- call void @llvm.masked.scatter.v2i8(<2 x i8> %vals, <2 x i8*> %ptrs, i32 8, <2 x i1> %mask)
+ call void @llvm.masked.scatter.v2i8(<2 x i8> %vals, <2 x ptr> %ptrs, i32 8, <2 x i1> %mask)
ret void
}
-define void @masked_scatter_v4i8(<4 x i8>* %a, <4 x i8*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v4i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
; CHECK-NEXT: st1b { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <4 x i8>, <4 x i8>* %a
- %ptrs = load <4 x i8*>, <4 x i8*>* %b
+ %vals = load <4 x i8>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = icmp eq <4 x i8> %vals, zeroinitializer
- call void @llvm.masked.scatter.v4i8(<4 x i8> %vals, <4 x i8*> %ptrs, i32 8, <4 x i1> %mask)
+ call void @llvm.masked.scatter.v4i8(<4 x i8> %vals, <4 x ptr> %ptrs, i32 8, <4 x i1> %mask)
ret void
}
-define void @masked_scatter_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 {
+define void @masked_scatter_v8i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_scatter_v8i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr d0, [x0]
; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z2.d, #0
; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [z1.d]
; VBITS_GE_512-NEXT: ret
- %vals = load <8 x i8>, <8 x i8>* %a
- %ptrs = load <8 x i8*>, <8 x i8*>* %b
+ %vals = load <8 x i8>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = icmp eq <8 x i8> %vals, zeroinitializer
- call void @llvm.masked.scatter.v8i8(<8 x i8> %vals, <8 x i8*> %ptrs, i32 8, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8i8(<8 x i8> %vals, <8 x ptr> %ptrs, i32 8, <8 x i1> %mask)
ret void
}
-define void @masked_scatter_v16i8(<16 x i8>* %a, <16 x i8*>* %b) vscale_range(8,0) #0 {
+define void @masked_scatter_v16i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_scatter_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
; CHECK-NEXT: st1b { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <16 x i8>, <16 x i8>* %a
- %ptrs = load <16 x i8*>, <16 x i8*>* %b
+ %vals = load <16 x i8>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = icmp eq <16 x i8> %vals, zeroinitializer
- call void @llvm.masked.scatter.v16i8(<16 x i8> %vals, <16 x i8*> %ptrs, i32 8, <16 x i1> %mask)
+ call void @llvm.masked.scatter.v16i8(<16 x i8> %vals, <16 x ptr> %ptrs, i32 8, <16 x i1> %mask)
ret void
}
-define void @masked_scatter_v32i8(<32 x i8>* %a, <32 x i8*>* %b) vscale_range(16,0) #0 {
+define void @masked_scatter_v32i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1b { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <32 x i8>, <32 x i8>* %a
- %ptrs = load <32 x i8*>, <32 x i8*>* %b
+ %vals = load <32 x i8>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = icmp eq <32 x i8> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32i8(<32 x i8> %vals, <32 x i8*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32i8(<32 x i8> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
; ST1H
;
-define void @masked_scatter_v2i16(<2 x i16>* %a, <2 x i16*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v2i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
; CHECK-NEXT: st1h { z0.d }, p0, [z2.d]
; CHECK-NEXT: ret
- %vals = load <2 x i16>, <2 x i16>* %a
- %ptrs = load <2 x i16*>, <2 x i16*>* %b
+ %vals = load <2 x i16>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = icmp eq <2 x i16> %vals, zeroinitializer
- call void @llvm.masked.scatter.v2i16(<2 x i16> %vals, <2 x i16*> %ptrs, i32 8, <2 x i1> %mask)
+ call void @llvm.masked.scatter.v2i16(<2 x i16> %vals, <2 x ptr> %ptrs, i32 8, <2 x i1> %mask)
ret void
}
-define void @masked_scatter_v4i16(<4 x i16>* %a, <4 x i16*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v4i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <4 x i16>, <4 x i16>* %a
- %ptrs = load <4 x i16*>, <4 x i16*>* %b
+ %vals = load <4 x i16>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = icmp eq <4 x i16> %vals, zeroinitializer
- call void @llvm.masked.scatter.v4i16(<4 x i16> %vals, <4 x i16*> %ptrs, i32 8, <4 x i1> %mask)
+ call void @llvm.masked.scatter.v4i16(<4 x i16> %vals, <4 x ptr> %ptrs, i32 8, <4 x i1> %mask)
ret void
}
-define void @masked_scatter_v8i16(<8 x i16>* %a, <8 x i16*>* %b) #0 {
+define void @masked_scatter_v8i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_scatter_v8i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x0]
; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z2.d, #0
; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [z1.d]
; VBITS_GE_512-NEXT: ret
- %vals = load <8 x i16>, <8 x i16>* %a
- %ptrs = load <8 x i16*>, <8 x i16*>* %b
+ %vals = load <8 x i16>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = icmp eq <8 x i16> %vals, zeroinitializer
- call void @llvm.masked.scatter.v8i16(<8 x i16> %vals, <8 x i16*> %ptrs, i32 8, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8i16(<8 x i16> %vals, <8 x ptr> %ptrs, i32 8, <8 x i1> %mask)
ret void
}
-define void @masked_scatter_v16i16(<16 x i16>* %a, <16 x i16*>* %b) vscale_range(8,0) #0 {
+define void @masked_scatter_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_scatter_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <16 x i16>, <16 x i16>* %a
- %ptrs = load <16 x i16*>, <16 x i16*>* %b
+ %vals = load <16 x i16>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = icmp eq <16 x i16> %vals, zeroinitializer
- call void @llvm.masked.scatter.v16i16(<16 x i16> %vals, <16 x i16*> %ptrs, i32 8, <16 x i1> %mask)
+ call void @llvm.masked.scatter.v16i16(<16 x i16> %vals, <16 x ptr> %ptrs, i32 8, <16 x i1> %mask)
ret void
}
-define void @masked_scatter_v32i16(<32 x i16>* %a, <32 x i16*>* %b) vscale_range(16,0) #0 {
+define void @masked_scatter_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <32 x i16>, <32 x i16>* %a
- %ptrs = load <32 x i16*>, <32 x i16*>* %b
+ %vals = load <32 x i16>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = icmp eq <32 x i16> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32i16(<32 x i16> %vals, <32 x i16*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32i16(<32 x i16> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
; ST1W
;
-define void @masked_scatter_v2i32(<2 x i32>* %a, <2 x i32*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v2i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
; CHECK-NEXT: st1w { z0.d }, p0, [z2.d]
; CHECK-NEXT: ret
- %vals = load <2 x i32>, <2 x i32>* %a
- %ptrs = load <2 x i32*>, <2 x i32*>* %b
+ %vals = load <2 x i32>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = icmp eq <2 x i32> %vals, zeroinitializer
- call void @llvm.masked.scatter.v2i32(<2 x i32> %vals, <2 x i32*> %ptrs, i32 8, <2 x i1> %mask)
+ call void @llvm.masked.scatter.v2i32(<2 x i32> %vals, <2 x ptr> %ptrs, i32 8, <2 x i1> %mask)
ret void
}
-define void @masked_scatter_v4i32(<4 x i32>* %a, <4 x i32*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v4i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <4 x i32>, <4 x i32>* %a
- %ptrs = load <4 x i32*>, <4 x i32*>* %b
+ %vals = load <4 x i32>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = icmp eq <4 x i32> %vals, zeroinitializer
- call void @llvm.masked.scatter.v4i32(<4 x i32> %vals, <4 x i32*> %ptrs, i32 8, <4 x i1> %mask)
+ call void @llvm.masked.scatter.v4i32(<4 x i32> %vals, <4 x ptr> %ptrs, i32 8, <4 x i1> %mask)
ret void
}
-define void @masked_scatter_v8i32(<8 x i32>* %a, <8 x i32*>* %b) #0 {
+define void @masked_scatter_v8i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_scatter_v8i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: punpklo p0.h, p0.b
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [z1.d]
; VBITS_GE_512-NEXT: ret
- %vals = load <8 x i32>, <8 x i32>* %a
- %ptrs = load <8 x i32*>, <8 x i32*>* %b
+ %vals = load <8 x i32>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = icmp eq <8 x i32> %vals, zeroinitializer
- call void @llvm.masked.scatter.v8i32(<8 x i32> %vals, <8 x i32*> %ptrs, i32 8, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8i32(<8 x i32> %vals, <8 x ptr> %ptrs, i32 8, <8 x i1> %mask)
ret void
}
-define void @masked_scatter_v16i32(<16 x i32>* %a, <16 x i32*>* %b) vscale_range(8,0) #0 {
+define void @masked_scatter_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_scatter_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <16 x i32>, <16 x i32>* %a
- %ptrs = load <16 x i32*>, <16 x i32*>* %b
+ %vals = load <16 x i32>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = icmp eq <16 x i32> %vals, zeroinitializer
- call void @llvm.masked.scatter.v16i32(<16 x i32> %vals, <16 x i32*> %ptrs, i32 8, <16 x i1> %mask)
+ call void @llvm.masked.scatter.v16i32(<16 x i32> %vals, <16 x ptr> %ptrs, i32 8, <16 x i1> %mask)
ret void
}
-define void @masked_scatter_v32i32(<32 x i32>* %a, <32 x i32*>* %b) vscale_range(16,0) #0 {
+define void @masked_scatter_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <32 x i32>, <32 x i32>* %a
- %ptrs = load <32 x i32*>, <32 x i32*>* %b
+ %vals = load <32 x i32>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = icmp eq <32 x i32> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32i32(<32 x i32> %vals, <32 x i32*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32i32(<32 x i32> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
;
; Scalarize 1 x i64 scatters
-define void @masked_scatter_v1i64(<1 x i64>* %a, <1 x i64*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v1i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x8]
; CHECK-NEXT: .LBB15_2: // %else
; CHECK-NEXT: ret
- %vals = load <1 x i64>, <1 x i64>* %a
- %ptrs = load <1 x i64*>, <1 x i64*>* %b
+ %vals = load <1 x i64>, ptr %a
+ %ptrs = load <1 x ptr>, ptr %b
%mask = icmp eq <1 x i64> %vals, zeroinitializer
- call void @llvm.masked.scatter.v1i64(<1 x i64> %vals, <1 x i64*> %ptrs, i32 8, <1 x i1> %mask)
+ call void @llvm.masked.scatter.v1i64(<1 x i64> %vals, <1 x ptr> %ptrs, i32 8, <1 x i1> %mask)
ret void
}
-define void @masked_scatter_v2i64(<2 x i64>* %a, <2 x i64*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <2 x i64>, <2 x i64>* %a
- %ptrs = load <2 x i64*>, <2 x i64*>* %b
+ %vals = load <2 x i64>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = icmp eq <2 x i64> %vals, zeroinitializer
- call void @llvm.masked.scatter.v2i64(<2 x i64> %vals, <2 x i64*> %ptrs, i32 8, <2 x i1> %mask)
+ call void @llvm.masked.scatter.v2i64(<2 x i64> %vals, <2 x ptr> %ptrs, i32 8, <2 x i1> %mask)
ret void
}
-define void @masked_scatter_v4i64(<4 x i64>* %a, <4 x i64*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <4 x i64>, <4 x i64>* %a
- %ptrs = load <4 x i64*>, <4 x i64*>* %b
+ %vals = load <4 x i64>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = icmp eq <4 x i64> %vals, zeroinitializer
- call void @llvm.masked.scatter.v4i64(<4 x i64> %vals, <4 x i64*> %ptrs, i32 8, <4 x i1> %mask)
+ call void @llvm.masked.scatter.v4i64(<4 x i64> %vals, <4 x ptr> %ptrs, i32 8, <4 x i1> %mask)
ret void
}
-define void @masked_scatter_v8i64(<8 x i64>* %a, <8 x i64*>* %b) #0 {
+define void @masked_scatter_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_scatter_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: cmpeq p0.d, p0/z, z0.d, #0
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [z1.d]
; VBITS_GE_512-NEXT: ret
- %vals = load <8 x i64>, <8 x i64>* %a
- %ptrs = load <8 x i64*>, <8 x i64*>* %b
+ %vals = load <8 x i64>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = icmp eq <8 x i64> %vals, zeroinitializer
- call void @llvm.masked.scatter.v8i64(<8 x i64> %vals, <8 x i64*> %ptrs, i32 8, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8i64(<8 x i64> %vals, <8 x ptr> %ptrs, i32 8, <8 x i1> %mask)
ret void
}
-define void @masked_scatter_v16i64(<16 x i64>* %a, <16 x i64*>* %b) vscale_range(8,0) #0 {
+define void @masked_scatter_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_scatter_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <16 x i64>, <16 x i64>* %a
- %ptrs = load <16 x i64*>, <16 x i64*>* %b
+ %vals = load <16 x i64>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = icmp eq <16 x i64> %vals, zeroinitializer
- call void @llvm.masked.scatter.v16i64(<16 x i64> %vals, <16 x i64*> %ptrs, i32 8, <16 x i1> %mask)
+ call void @llvm.masked.scatter.v16i64(<16 x i64> %vals, <16 x ptr> %ptrs, i32 8, <16 x i1> %mask)
ret void
}
-define void @masked_scatter_v32i64(<32 x i64>* %a, <32 x i64*>* %b) vscale_range(16,0) #0 {
+define void @masked_scatter_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <32 x i64>, <32 x i64>* %a
- %ptrs = load <32 x i64*>, <32 x i64*>* %b
+ %vals = load <32 x i64>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = icmp eq <32 x i64> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32i64(<32 x i64> %vals, <32 x i64*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32i64(<32 x i64> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
; ST1H (float)
;
-define void @masked_scatter_v2f16(<2 x half>* %a, <2 x half*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v2f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s1, [x0]
; CHECK-NEXT: uunpklo z0.d, z1.s
; CHECK-NEXT: st1h { z0.d }, p0, [z2.d]
; CHECK-NEXT: ret
- %vals = load <2 x half>, <2 x half>* %a
- %ptrs = load <2 x half*>, <2 x half*>* %b
+ %vals = load <2 x half>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = fcmp oeq <2 x half> %vals, zeroinitializer
- call void @llvm.masked.scatter.v2f16(<2 x half> %vals, <2 x half*> %ptrs, i32 8, <2 x i1> %mask)
+ call void @llvm.masked.scatter.v2f16(<2 x half> %vals, <2 x ptr> %ptrs, i32 8, <2 x i1> %mask)
ret void
}
-define void @masked_scatter_v4f16(<4 x half>* %a, <4 x half*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v4f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <4 x half>, <4 x half>* %a
- %ptrs = load <4 x half*>, <4 x half*>* %b
+ %vals = load <4 x half>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = fcmp oeq <4 x half> %vals, zeroinitializer
- call void @llvm.masked.scatter.v4f16(<4 x half> %vals, <4 x half*> %ptrs, i32 8, <4 x i1> %mask)
+ call void @llvm.masked.scatter.v4f16(<4 x half> %vals, <4 x ptr> %ptrs, i32 8, <4 x i1> %mask)
ret void
}
-define void @masked_scatter_v8f16(<8 x half>* %a, <8 x half*>* %b) #0 {
+define void @masked_scatter_v8f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_scatter_v8f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x0]
; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z2.d, #0
; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [z1.d]
; VBITS_GE_512-NEXT: ret
- %vals = load <8 x half>, <8 x half>* %a
- %ptrs = load <8 x half*>, <8 x half*>* %b
+ %vals = load <8 x half>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = fcmp oeq <8 x half> %vals, zeroinitializer
- call void @llvm.masked.scatter.v8f16(<8 x half> %vals, <8 x half*> %ptrs, i32 8, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8f16(<8 x half> %vals, <8 x ptr> %ptrs, i32 8, <8 x i1> %mask)
ret void
}
-define void @masked_scatter_v16f16(<16 x half>* %a, <16 x half*>* %b) vscale_range(8,0) #0 {
+define void @masked_scatter_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_scatter_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <16 x half>, <16 x half>* %a
- %ptrs = load <16 x half*>, <16 x half*>* %b
+ %vals = load <16 x half>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = fcmp oeq <16 x half> %vals, zeroinitializer
- call void @llvm.masked.scatter.v16f16(<16 x half> %vals, <16 x half*> %ptrs, i32 8, <16 x i1> %mask)
+ call void @llvm.masked.scatter.v16f16(<16 x half> %vals, <16 x ptr> %ptrs, i32 8, <16 x i1> %mask)
ret void
}
-define void @masked_scatter_v32f16(<32 x half>* %a, <32 x half*>* %b) vscale_range(16,0) #0 {
+define void @masked_scatter_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <32 x half>, <32 x half>* %a
- %ptrs = load <32 x half*>, <32 x half*>* %b
+ %vals = load <32 x half>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = fcmp oeq <32 x half> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x half*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
; ST1W (float)
;
-define void @masked_scatter_v2f32(<2 x float>* %a, <2 x float*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v2f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
; CHECK-NEXT: st1w { z0.d }, p0, [z2.d]
; CHECK-NEXT: ret
- %vals = load <2 x float>, <2 x float>* %a
- %ptrs = load <2 x float*>, <2 x float*>* %b
+ %vals = load <2 x float>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = fcmp oeq <2 x float> %vals, zeroinitializer
- call void @llvm.masked.scatter.v2f32(<2 x float> %vals, <2 x float*> %ptrs, i32 8, <2 x i1> %mask)
+ call void @llvm.masked.scatter.v2f32(<2 x float> %vals, <2 x ptr> %ptrs, i32 8, <2 x i1> %mask)
ret void
}
-define void @masked_scatter_v4f32(<4 x float>* %a, <4 x float*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v4f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <4 x float>, <4 x float>* %a
- %ptrs = load <4 x float*>, <4 x float*>* %b
+ %vals = load <4 x float>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = fcmp oeq <4 x float> %vals, zeroinitializer
- call void @llvm.masked.scatter.v4f32(<4 x float> %vals, <4 x float*> %ptrs, i32 8, <4 x i1> %mask)
+ call void @llvm.masked.scatter.v4f32(<4 x float> %vals, <4 x ptr> %ptrs, i32 8, <4 x i1> %mask)
ret void
}
-define void @masked_scatter_v8f32(<8 x float>* %a, <8 x float*>* %b) #0 {
+define void @masked_scatter_v8f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_scatter_v8f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_512-NEXT: punpklo p0.h, p0.b
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [z1.d]
; VBITS_GE_512-NEXT: ret
- %vals = load <8 x float>, <8 x float>* %a
- %ptrs = load <8 x float*>, <8 x float*>* %b
+ %vals = load <8 x float>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = fcmp oeq <8 x float> %vals, zeroinitializer
- call void @llvm.masked.scatter.v8f32(<8 x float> %vals, <8 x float*> %ptrs, i32 8, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8f32(<8 x float> %vals, <8 x ptr> %ptrs, i32 8, <8 x i1> %mask)
ret void
}
-define void @masked_scatter_v16f32(<16 x float>* %a, <16 x float*>* %b) vscale_range(8,0) #0 {
+define void @masked_scatter_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_scatter_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <16 x float>, <16 x float>* %a
- %ptrs = load <16 x float*>, <16 x float*>* %b
+ %vals = load <16 x float>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = fcmp oeq <16 x float> %vals, zeroinitializer
- call void @llvm.masked.scatter.v16f32(<16 x float> %vals, <16 x float*> %ptrs, i32 8, <16 x i1> %mask)
+ call void @llvm.masked.scatter.v16f32(<16 x float> %vals, <16 x ptr> %ptrs, i32 8, <16 x i1> %mask)
ret void
}
-define void @masked_scatter_v32f32(<32 x float>* %a, <32 x float*>* %b) vscale_range(16,0) #0 {
+define void @masked_scatter_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <32 x float>, <32 x float>* %a
- %ptrs = load <32 x float*>, <32 x float*>* %b
+ %vals = load <32 x float>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = fcmp oeq <32 x float> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x float*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
;
; Scalarize 1 x double scatters
-define void @masked_scatter_v1f64(<1 x double>* %a, <1 x double*>* %b) vscale_range(8,0) #0 {
+define void @masked_scatter_v1f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_scatter_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x8]
; CHECK-NEXT: .LBB31_2: // %else
; CHECK-NEXT: ret
- %vals = load <1 x double>, <1 x double>* %a
- %ptrs = load <1 x double*>, <1 x double*>* %b
+ %vals = load <1 x double>, ptr %a
+ %ptrs = load <1 x ptr>, ptr %b
%mask = fcmp oeq <1 x double> %vals, zeroinitializer
- call void @llvm.masked.scatter.v1f64(<1 x double> %vals, <1 x double*> %ptrs, i32 8, <1 x i1> %mask)
+ call void @llvm.masked.scatter.v1f64(<1 x double> %vals, <1 x ptr> %ptrs, i32 8, <1 x i1> %mask)
ret void
}
-define void @masked_scatter_v2f64(<2 x double>* %a, <2 x double*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v2f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
; CHECK-NEXT: st1d { z0.d }, p0, [z2.d]
; CHECK-NEXT: ret
- %vals = load <2 x double>, <2 x double>* %a
- %ptrs = load <2 x double*>, <2 x double*>* %b
+ %vals = load <2 x double>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
%mask = fcmp oeq <2 x double> %vals, zeroinitializer
- call void @llvm.masked.scatter.v2f64(<2 x double> %vals, <2 x double*> %ptrs, i32 8, <2 x i1> %mask)
+ call void @llvm.masked.scatter.v2f64(<2 x double> %vals, <2 x ptr> %ptrs, i32 8, <2 x i1> %mask)
ret void
}
-define void @masked_scatter_v4f64(<4 x double>* %a, <4 x double*>* %b) vscale_range(2,0) #0 {
+define void @masked_scatter_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_scatter_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <4 x double>, <4 x double>* %a
- %ptrs = load <4 x double*>, <4 x double*>* %b
+ %vals = load <4 x double>, ptr %a
+ %ptrs = load <4 x ptr>, ptr %b
%mask = fcmp oeq <4 x double> %vals, zeroinitializer
- call void @llvm.masked.scatter.v4f64(<4 x double> %vals, <4 x double*> %ptrs, i32 8, <4 x i1> %mask)
+ call void @llvm.masked.scatter.v4f64(<4 x double> %vals, <4 x ptr> %ptrs, i32 8, <4 x i1> %mask)
ret void
}
-define void @masked_scatter_v8f64(<8 x double>* %a, <8 x double*>* %b) #0 {
+define void @masked_scatter_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: masked_scatter_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [z1.d]
; VBITS_GE_512-NEXT: ret
- %vals = load <8 x double>, <8 x double>* %a
- %ptrs = load <8 x double*>, <8 x double*>* %b
+ %vals = load <8 x double>, ptr %a
+ %ptrs = load <8 x ptr>, ptr %b
%mask = fcmp oeq <8 x double> %vals, zeroinitializer
- call void @llvm.masked.scatter.v8f64(<8 x double> %vals, <8 x double*> %ptrs, i32 8, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8f64(<8 x double> %vals, <8 x ptr> %ptrs, i32 8, <8 x i1> %mask)
ret void
}
-define void @masked_scatter_v16f64(<16 x double>* %a, <16 x double*>* %b) vscale_range(8,0) #0 {
+define void @masked_scatter_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_scatter_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <16 x double>, <16 x double>* %a
- %ptrs = load <16 x double*>, <16 x double*>* %b
+ %vals = load <16 x double>, ptr %a
+ %ptrs = load <16 x ptr>, ptr %b
%mask = fcmp oeq <16 x double> %vals, zeroinitializer
- call void @llvm.masked.scatter.v16f64(<16 x double> %vals, <16 x double*> %ptrs, i32 8, <16 x i1> %mask)
+ call void @llvm.masked.scatter.v16f64(<16 x double> %vals, <16 x ptr> %ptrs, i32 8, <16 x i1> %mask)
ret void
}
-define void @masked_scatter_v32f64(<32 x double>* %a, <32 x double*>* %b) vscale_range(16,0) #0 {
+define void @masked_scatter_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
- %vals = load <32 x double>, <32 x double>* %a
- %ptrs = load <32 x double*>, <32 x double*>* %b
+ %vals = load <32 x double>, ptr %a
+ %ptrs = load <32 x ptr>, ptr %b
%mask = fcmp oeq <32 x double> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32f64(<32 x double> %vals, <32 x double*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32f64(<32 x double> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
; The above tests test the types, the below tests check that the addressing
; modes still function
-define void @masked_scatter_32b_scaled_sext_f16(<32 x half>* %a, <32 x i32>* %b, half* %base) vscale_range(16,0) #0 {
+define void @masked_scatter_32b_scaled_sext_f16(ptr %a, ptr %b, ptr %base) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_32b_scaled_sext_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1h { z0.s }, p0, [x2, z1.s, sxtw #1]
; CHECK-NEXT: ret
- %vals = load <32 x half>, <32 x half>* %a
- %idxs = load <32 x i32>, <32 x i32>* %b
+ %vals = load <32 x half>, ptr %a
+ %idxs = load <32 x i32>, ptr %b
%ext = sext <32 x i32> %idxs to <32 x i64>
- %ptrs = getelementptr half, half* %base, <32 x i64> %ext
+ %ptrs = getelementptr half, ptr %base, <32 x i64> %ext
%mask = fcmp oeq <32 x half> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x half*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
-define void @masked_scatter_32b_scaled_sext_f32(<32 x float>* %a, <32 x i32>* %b, float* %base) vscale_range(16,0) #0 {
+define void @masked_scatter_32b_scaled_sext_f32(ptr %a, ptr %b, ptr %base) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_32b_scaled_sext_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: st1w { z0.s }, p0, [x2, z1.s, sxtw #2]
; CHECK-NEXT: ret
- %vals = load <32 x float>, <32 x float>* %a
- %idxs = load <32 x i32>, <32 x i32>* %b
+ %vals = load <32 x float>, ptr %a
+ %idxs = load <32 x i32>, ptr %b
%ext = sext <32 x i32> %idxs to <32 x i64>
- %ptrs = getelementptr float, float* %base, <32 x i64> %ext
+ %ptrs = getelementptr float, ptr %base, <32 x i64> %ext
%mask = fcmp oeq <32 x float> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x float*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
-define void @masked_scatter_32b_scaled_sext_f64(<32 x double>* %a, <32 x i32>* %b, double* %base) vscale_range(16,0) #0 {
+define void @masked_scatter_32b_scaled_sext_f64(ptr %a, ptr %b, ptr %base) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_32b_scaled_sext_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0
; CHECK-NEXT: st1d { z0.d }, p0, [x2, z1.d, lsl #3]
; CHECK-NEXT: ret
- %vals = load <32 x double>, <32 x double>* %a
- %idxs = load <32 x i32>, <32 x i32>* %b
+ %vals = load <32 x double>, ptr %a
+ %idxs = load <32 x i32>, ptr %b
%ext = sext <32 x i32> %idxs to <32 x i64>
- %ptrs = getelementptr double, double* %base, <32 x i64> %ext
+ %ptrs = getelementptr double, ptr %base, <32 x i64> %ext
%mask = fcmp oeq <32 x double> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32f64(<32 x double> %vals, <32 x double*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32f64(<32 x double> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
-define void @masked_scatter_32b_scaled_zext(<32 x half>* %a, <32 x i32>* %b, half* %base) vscale_range(16,0) #0 {
+define void @masked_scatter_32b_scaled_zext(ptr %a, ptr %b, ptr %base) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_32b_scaled_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1h { z0.s }, p0, [x2, z1.s, uxtw #1]
; CHECK-NEXT: ret
- %vals = load <32 x half>, <32 x half>* %a
- %idxs = load <32 x i32>, <32 x i32>* %b
+ %vals = load <32 x half>, ptr %a
+ %idxs = load <32 x i32>, ptr %b
%ext = zext <32 x i32> %idxs to <32 x i64>
- %ptrs = getelementptr half, half* %base, <32 x i64> %ext
+ %ptrs = getelementptr half, ptr %base, <32 x i64> %ext
%mask = fcmp oeq <32 x half> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x half*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
-define void @masked_scatter_32b_unscaled_sext(<32 x half>* %a, <32 x i32>* %b, i8* %base) vscale_range(16,0) #0 {
+define void @masked_scatter_32b_unscaled_sext(ptr %a, ptr %b, ptr %base) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_32b_unscaled_sext:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1h { z0.s }, p0, [x2, z1.s, sxtw]
; CHECK-NEXT: ret
- %vals = load <32 x half>, <32 x half>* %a
- %idxs = load <32 x i32>, <32 x i32>* %b
+ %vals = load <32 x half>, ptr %a
+ %idxs = load <32 x i32>, ptr %b
%ext = sext <32 x i32> %idxs to <32 x i64>
- %byte_ptrs = getelementptr i8, i8* %base, <32 x i64> %ext
- %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x half*>
+ %byte_ptrs = getelementptr i8, ptr %base, <32 x i64> %ext
+ %ptrs = bitcast <32 x ptr> %byte_ptrs to <32 x ptr>
%mask = fcmp oeq <32 x half> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x half*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
-define void @masked_scatter_32b_unscaled_zext(<32 x half>* %a, <32 x i32>* %b, i8* %base) vscale_range(16,0) #0 {
+define void @masked_scatter_32b_unscaled_zext(ptr %a, ptr %b, ptr %base) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_32b_unscaled_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1h { z0.s }, p0, [x2, z1.s, uxtw]
; CHECK-NEXT: ret
- %vals = load <32 x half>, <32 x half>* %a
- %idxs = load <32 x i32>, <32 x i32>* %b
+ %vals = load <32 x half>, ptr %a
+ %idxs = load <32 x i32>, ptr %b
%ext = zext <32 x i32> %idxs to <32 x i64>
- %byte_ptrs = getelementptr i8, i8* %base, <32 x i64> %ext
- %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x half*>
+ %byte_ptrs = getelementptr i8, ptr %base, <32 x i64> %ext
+ %ptrs = bitcast <32 x ptr> %byte_ptrs to <32 x ptr>
%mask = fcmp oeq <32 x half> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x half*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
-define void @masked_scatter_64b_scaled(<32 x float>* %a, <32 x i64>* %b, float* %base) vscale_range(16,0) #0 {
+define void @masked_scatter_64b_scaled(ptr %a, ptr %b, ptr %base) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_64b_scaled:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1w { z0.d }, p0, [x2, z1.d, lsl #2]
; CHECK-NEXT: ret
- %vals = load <32 x float>, <32 x float>* %a
- %idxs = load <32 x i64>, <32 x i64>* %b
- %ptrs = getelementptr float, float* %base, <32 x i64> %idxs
+ %vals = load <32 x float>, ptr %a
+ %idxs = load <32 x i64>, ptr %b
+ %ptrs = getelementptr float, ptr %base, <32 x i64> %idxs
%mask = fcmp oeq <32 x float> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x float*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
-define void @masked_scatter_64b_unscaled(<32 x float>* %a, <32 x i64>* %b, i8* %base) vscale_range(16,0) #0 {
+define void @masked_scatter_64b_unscaled(ptr %a, ptr %b, ptr %base) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_64b_unscaled:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1w { z0.d }, p0, [x2, z1.d]
; CHECK-NEXT: ret
- %vals = load <32 x float>, <32 x float>* %a
- %idxs = load <32 x i64>, <32 x i64>* %b
- %byte_ptrs = getelementptr i8, i8* %base, <32 x i64> %idxs
- %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x float*>
+ %vals = load <32 x float>, ptr %a
+ %idxs = load <32 x i64>, ptr %b
+ %byte_ptrs = getelementptr i8, ptr %base, <32 x i64> %idxs
+ %ptrs = bitcast <32 x ptr> %byte_ptrs to <32 x ptr>
%mask = fcmp oeq <32 x float> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x float*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
-define void @masked_scatter_vec_plus_reg(<32 x float>* %a, <32 x i8*>* %b, i64 %off) vscale_range(16,0) #0 {
+define void @masked_scatter_vec_plus_reg(ptr %a, ptr %b, i64 %off) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_vec_plus_reg:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1w { z0.d }, p0, [x2, z1.d]
; CHECK-NEXT: ret
- %vals = load <32 x float>, <32 x float>* %a
- %bases = load <32 x i8*>, <32 x i8*>* %b
- %byte_ptrs = getelementptr i8, <32 x i8*> %bases, i64 %off
- %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x float*>
+ %vals = load <32 x float>, ptr %a
+ %bases = load <32 x ptr>, ptr %b
+ %byte_ptrs = getelementptr i8, <32 x ptr> %bases, i64 %off
+ %ptrs = bitcast <32 x ptr> %byte_ptrs to <32 x ptr>
%mask = fcmp oeq <32 x float> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x float*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
-define void @masked_scatter_vec_plus_imm(<32 x float>* %a, <32 x i8*>* %b) vscale_range(16,0) #0 {
+define void @masked_scatter_vec_plus_imm(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_vec_plus_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d, #4]
; CHECK-NEXT: ret
- %vals = load <32 x float>, <32 x float>* %a
- %bases = load <32 x i8*>, <32 x i8*>* %b
- %byte_ptrs = getelementptr i8, <32 x i8*> %bases, i64 4
- %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x float*>
+ %vals = load <32 x float>, ptr %a
+ %bases = load <32 x ptr>, ptr %b
+ %byte_ptrs = getelementptr i8, <32 x ptr> %bases, i64 4
+ %ptrs = bitcast <32 x ptr> %byte_ptrs to <32 x ptr>
%mask = fcmp oeq <32 x float> %vals, zeroinitializer
- call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x float*> %ptrs, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x ptr> %ptrs, i32 8, <32 x i1> %mask)
ret void
}
; NOTE: For this test to function correctly it's critical for %vals to be in a
; different block to the scatter store. If not, the problematic bitcast will be
; removed before operation legalisation and thus not exercise the combine.
-define void @masked_scatter_bitcast_infinite_loop(<8 x double>* %a, <8 x double*>* %b, i1 %cond) vscale_range(4,0) #0 {
+define void @masked_scatter_bitcast_infinite_loop(ptr %a, ptr %b, i1 %cond) vscale_range(4,0) #0 {
; CHECK-LABEL: masked_scatter_bitcast_infinite_loop:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
; CHECK-NEXT: .LBB47_2: // %bb.2
; CHECK-NEXT: ret
- %vals = load volatile <8 x double>, <8 x double>* %a
+ %vals = load volatile <8 x double>, ptr %a
br i1 %cond, label %bb.1, label %bb.2
bb.1:
- %ptrs = load <8 x double*>, <8 x double*>* %b
+ %ptrs = load <8 x ptr>, ptr %b
%mask = fcmp oeq <8 x double> %vals, zeroinitializer
- call void @llvm.masked.scatter.v8f64(<8 x double> %vals, <8 x double*> %ptrs, i32 8, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8f64(<8 x double> %vals, <8 x ptr> %ptrs, i32 8, <8 x i1> %mask)
br label %bb.2
bb.2:
ret void
}
-declare void @llvm.masked.scatter.v2i8(<2 x i8>, <2 x i8*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v4i8(<4 x i8>, <4 x i8*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v8i8(<8 x i8>, <8 x i8*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v16i8(<16 x i8>, <16 x i8*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v32i8(<32 x i8>, <32 x i8*>, i32, <32 x i1>)
-
-declare void @llvm.masked.scatter.v2i16(<2 x i16>, <2 x i16*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v4i16(<4 x i16>, <4 x i16*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v8i16(<8 x i16>, <8 x i16*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v16i16(<16 x i16>, <16 x i16*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v32i16(<32 x i16>, <32 x i16*>, i32, <32 x i1>)
-
-declare void @llvm.masked.scatter.v2i32(<2 x i32>, <2 x i32*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v4i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v8i32(<8 x i32>, <8 x i32*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v16i32(<16 x i32>, <16 x i32*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v32i32(<32 x i32>, <32 x i32*>, i32, <32 x i1>)
-
-declare void @llvm.masked.scatter.v1i64(<1 x i64>, <1 x i64*>, i32, <1 x i1>)
-declare void @llvm.masked.scatter.v2i64(<2 x i64>, <2 x i64*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v4i64(<4 x i64>, <4 x i64*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v8i64(<8 x i64>, <8 x i64*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v16i64(<16 x i64>, <16 x i64*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v32i64(<32 x i64>, <32 x i64*>, i32, <32 x i1>)
-
-declare void @llvm.masked.scatter.v2f16(<2 x half>, <2 x half*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v4f16(<4 x half>, <4 x half*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v8f16(<8 x half>, <8 x half*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v16f16(<16 x half>, <16 x half*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v32f16(<32 x half>, <32 x half*>, i32, <32 x i1>)
-
-declare void @llvm.masked.scatter.v2f32(<2 x float>, <2 x float*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v4f32(<4 x float>, <4 x float*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v8f32(<8 x float>, <8 x float*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v16f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v32f32(<32 x float>, <32 x float*>, i32, <32 x i1>)
-
-declare void @llvm.masked.scatter.v1f64(<1 x double>, <1 x double*>, i32, <1 x i1>)
-declare void @llvm.masked.scatter.v2f64(<2 x double>, <2 x double*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v4f64(<4 x double>, <4 x double*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v8f64(<8 x double>, <8 x double*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v16f64(<16 x double>, <16 x double*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v32f64(<32 x double>, <32 x double*>, i32, <32 x i1>)
+declare void @llvm.masked.scatter.v2i8(<2 x i8>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v4i8(<4 x i8>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v8i8(<8 x i8>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v16i8(<16 x i8>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v32i8(<32 x i8>, <32 x ptr>, i32, <32 x i1>)
+
+declare void @llvm.masked.scatter.v2i16(<2 x i16>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v4i16(<4 x i16>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v8i16(<8 x i16>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v16i16(<16 x i16>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v32i16(<32 x i16>, <32 x ptr>, i32, <32 x i1>)
+
+declare void @llvm.masked.scatter.v2i32(<2 x i32>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v4i32(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v8i32(<8 x i32>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v16i32(<16 x i32>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v32i32(<32 x i32>, <32 x ptr>, i32, <32 x i1>)
+
+declare void @llvm.masked.scatter.v1i64(<1 x i64>, <1 x ptr>, i32, <1 x i1>)
+declare void @llvm.masked.scatter.v2i64(<2 x i64>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v4i64(<4 x i64>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v8i64(<8 x i64>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v16i64(<16 x i64>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v32i64(<32 x i64>, <32 x ptr>, i32, <32 x i1>)
+
+declare void @llvm.masked.scatter.v2f16(<2 x half>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v4f16(<4 x half>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v8f16(<8 x half>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v16f16(<16 x half>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v32f16(<32 x half>, <32 x ptr>, i32, <32 x i1>)
+
+declare void @llvm.masked.scatter.v2f32(<2 x float>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v4f32(<4 x float>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v8f32(<8 x float>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v16f32(<16 x float>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v32f32(<32 x float>, <32 x ptr>, i32, <32 x i1>)
+
+declare void @llvm.masked.scatter.v1f64(<1 x double>, <1 x ptr>, i32, <1 x i1>)
+declare void @llvm.masked.scatter.v2f64(<2 x double>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v4f64(<4 x double>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v8f64(<8 x double>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v16f64(<16 x double>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v32f64(<32 x double>, <32 x ptr>, i32, <32 x i1>)
attributes #0 = { "target-features"="+sve" }
; Masked Stores
;
-define void @masked_store_v2f16(<2 x half>* %ap, <2 x half>* %bp) vscale_range(2,0) #0 {
+define void @masked_store_v2f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_store_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s1, [x0]
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <2 x half>, <2 x half>* %ap
- %b = load <2 x half>, <2 x half>* %bp
+ %a = load <2 x half>, ptr %ap
+ %b = load <2 x half>, ptr %bp
%mask = fcmp oeq <2 x half> %a, %b
- call void @llvm.masked.store.v2f16(<2 x half> %a, <2 x half>* %bp, i32 8, <2 x i1> %mask)
+ call void @llvm.masked.store.v2f16(<2 x half> %a, ptr %bp, i32 8, <2 x i1> %mask)
ret void
}
-define void @masked_store_v2f32(<2 x float>* %ap, <2 x float>* %bp) vscale_range(2,0) #0 {
+define void @masked_store_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_store_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <2 x float>, <2 x float>* %ap
- %b = load <2 x float>, <2 x float>* %bp
+ %a = load <2 x float>, ptr %ap
+ %b = load <2 x float>, ptr %bp
%mask = fcmp oeq <2 x float> %a, %b
- call void @llvm.masked.store.v2f32(<2 x float> %a, <2 x float>* %bp, i32 8, <2 x i1> %mask)
+ call void @llvm.masked.store.v2f32(<2 x float> %a, ptr %bp, i32 8, <2 x i1> %mask)
ret void
}
-define void @masked_store_v4f32(<4 x float>* %ap, <4 x float>* %bp) vscale_range(2,0) #0 {
+define void @masked_store_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_store_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <4 x float>, <4 x float>* %ap
- %b = load <4 x float>, <4 x float>* %bp
+ %a = load <4 x float>, ptr %ap
+ %b = load <4 x float>, ptr %bp
%mask = fcmp oeq <4 x float> %a, %b
- call void @llvm.masked.store.v4f32(<4 x float> %a, <4 x float>* %bp, i32 8, <4 x i1> %mask)
+ call void @llvm.masked.store.v4f32(<4 x float> %a, ptr %bp, i32 8, <4 x i1> %mask)
ret void
}
-define void @masked_store_v8f32(<8 x float>* %ap, <8 x float>* %bp) vscale_range(2,0) #0 {
+define void @masked_store_v8f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_store_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <8 x float>, <8 x float>* %ap
- %b = load <8 x float>, <8 x float>* %bp
+ %a = load <8 x float>, ptr %ap
+ %b = load <8 x float>, ptr %bp
%mask = fcmp oeq <8 x float> %a, %b
- call void @llvm.masked.store.v8f32(<8 x float> %a, <8 x float>* %bp, i32 8, <8 x i1> %mask)
+ call void @llvm.masked.store.v8f32(<8 x float> %a, ptr %bp, i32 8, <8 x i1> %mask)
ret void
}
-define void @masked_store_v16f32(<16 x float>* %ap, <16 x float>* %bp) #0 {
+define void @masked_store_v16f32(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_store_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %a = load <16 x float>, <16 x float>* %ap
- %b = load <16 x float>, <16 x float>* %bp
+ %a = load <16 x float>, ptr %ap
+ %b = load <16 x float>, ptr %bp
%mask = fcmp oeq <16 x float> %a, %b
- call void @llvm.masked.store.v16f32(<16 x float> %a, <16 x float>* %ap, i32 8, <16 x i1> %mask)
+ call void @llvm.masked.store.v16f32(<16 x float> %a, ptr %ap, i32 8, <16 x i1> %mask)
ret void
}
-define void @masked_store_v32f32(<32 x float>* %ap, <32 x float>* %bp) vscale_range(8,0) #0 {
+define void @masked_store_v32f32(ptr %ap, ptr %bp) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_store_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %a = load <32 x float>, <32 x float>* %ap
- %b = load <32 x float>, <32 x float>* %bp
+ %a = load <32 x float>, ptr %ap
+ %b = load <32 x float>, ptr %bp
%mask = fcmp oeq <32 x float> %a, %b
- call void @llvm.masked.store.v32f32(<32 x float> %a, <32 x float>* %ap, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.store.v32f32(<32 x float> %a, ptr %ap, i32 8, <32 x i1> %mask)
ret void
}
-define void @masked_store_v64f32(<64 x float>* %ap, <64 x float>* %bp) vscale_range(16,0) #0 {
+define void @masked_store_v64f32(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_store_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %a = load <64 x float>, <64 x float>* %ap
- %b = load <64 x float>, <64 x float>* %bp
+ %a = load <64 x float>, ptr %ap
+ %b = load <64 x float>, ptr %bp
%mask = fcmp oeq <64 x float> %a, %b
- call void @llvm.masked.store.v64f32(<64 x float> %a, <64 x float>* %ap, i32 8, <64 x i1> %mask)
+ call void @llvm.masked.store.v64f32(<64 x float> %a, ptr %ap, i32 8, <64 x i1> %mask)
ret void
}
-define void @masked_store_trunc_v8i64i8(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i8>* %dest) #0 {
+define void @masked_store_trunc_v8i64i8(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-LABEL: masked_store_trunc_v8i64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %a = load <8 x i64>, <8 x i64>* %ap
- %b = load <8 x i64>, <8 x i64>* %bp
+ %a = load <8 x i64>, ptr %ap
+ %b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %a, %b
%val = trunc <8 x i64> %a to <8 x i8>
- call void @llvm.masked.store.v8i8(<8 x i8> %val, <8 x i8>* %dest, i32 8, <8 x i1> %mask)
+ call void @llvm.masked.store.v8i8(<8 x i8> %val, ptr %dest, i32 8, <8 x i1> %mask)
ret void
}
-define void @masked_store_trunc_v8i64i16(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i16>* %dest) #0 {
+define void @masked_store_trunc_v8i64i16(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-LABEL: masked_store_trunc_v8i64i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %a = load <8 x i64>, <8 x i64>* %ap
- %b = load <8 x i64>, <8 x i64>* %bp
+ %a = load <8 x i64>, ptr %ap
+ %b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %a, %b
%val = trunc <8 x i64> %a to <8 x i16>
- call void @llvm.masked.store.v8i16(<8 x i16> %val, <8 x i16>* %dest, i32 8, <8 x i1> %mask)
+ call void @llvm.masked.store.v8i16(<8 x i16> %val, ptr %dest, i32 8, <8 x i1> %mask)
ret void
}
-define void @masked_store_trunc_v8i64i32(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i32>* %dest) #0 {
+define void @masked_store_trunc_v8i64i32(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-LABEL: masked_store_trunc_v8i64i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %a = load <8 x i64>, <8 x i64>* %ap
- %b = load <8 x i64>, <8 x i64>* %bp
+ %a = load <8 x i64>, ptr %ap
+ %b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %a, %b
%val = trunc <8 x i64> %a to <8 x i32>
- call void @llvm.masked.store.v8i32(<8 x i32> %val, <8 x i32>* %dest, i32 8, <8 x i1> %mask)
+ call void @llvm.masked.store.v8i32(<8 x i32> %val, ptr %dest, i32 8, <8 x i1> %mask)
ret void
}
-define void @masked_store_trunc_v16i32i8(<16 x i32>* %ap, <16 x i32>* %bp, <16 x i8>* %dest) #0 {
+define void @masked_store_trunc_v16i32i8(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-LABEL: masked_store_trunc_v16i32i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
; VBITS_GE_512-NEXT: st1b { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %a = load <16 x i32>, <16 x i32>* %ap
- %b = load <16 x i32>, <16 x i32>* %bp
+ %a = load <16 x i32>, ptr %ap
+ %b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %a, %b
%val = trunc <16 x i32> %a to <16 x i8>
- call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %dest, i32 8, <16 x i1> %mask)
+ call void @llvm.masked.store.v16i8(<16 x i8> %val, ptr %dest, i32 8, <16 x i1> %mask)
ret void
}
-define void @masked_store_trunc_v16i32i16(<16 x i32>* %ap, <16 x i32>* %bp, <16 x i16>* %dest) #0 {
+define void @masked_store_trunc_v16i32i16(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-LABEL: masked_store_trunc_v16i32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %a = load <16 x i32>, <16 x i32>* %ap
- %b = load <16 x i32>, <16 x i32>* %bp
+ %a = load <16 x i32>, ptr %ap
+ %b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %a, %b
%val = trunc <16 x i32> %a to <16 x i16>
- call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %dest, i32 8, <16 x i1> %mask)
+ call void @llvm.masked.store.v16i16(<16 x i16> %val, ptr %dest, i32 8, <16 x i1> %mask)
ret void
}
-define void @masked_store_trunc_v32i16i8(<32 x i16>* %ap, <32 x i16>* %bp, <32 x i8>* %dest) #0 {
+define void @masked_store_trunc_v32i16i8(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-LABEL: masked_store_trunc_v32i16i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
; VBITS_GE_512-NEXT: st1b { z0.h }, p0, [x2]
; VBITS_GE_512-NEXT: ret
- %a = load <32 x i16>, <32 x i16>* %ap
- %b = load <32 x i16>, <32 x i16>* %bp
+ %a = load <32 x i16>, ptr %ap
+ %b = load <32 x i16>, ptr %bp
%mask = icmp eq <32 x i16> %a, %b
%val = trunc <32 x i16> %a to <32 x i8>
- call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %dest, i32 8, <32 x i1> %mask)
+ call void @llvm.masked.store.v32i8(<32 x i8> %val, ptr %dest, i32 8, <32 x i1> %mask)
ret void
}
-declare void @llvm.masked.store.v2f16(<2 x half>, <2 x half>*, i32, <2 x i1>)
-declare void @llvm.masked.store.v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
-declare void @llvm.masked.store.v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
-declare void @llvm.masked.store.v8f32(<8 x float>, <8 x float>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
-declare void @llvm.masked.store.v32f32(<32 x float>, <32 x float>*, i32, <32 x i1>)
-declare void @llvm.masked.store.v64f32(<64 x float>, <64 x float>*, i32, <64 x i1>)
+declare void @llvm.masked.store.v2f16(<2 x half>, ptr, i32, <2 x i1>)
+declare void @llvm.masked.store.v2f32(<2 x float>, ptr, i32, <2 x i1>)
+declare void @llvm.masked.store.v4f32(<4 x float>, ptr, i32, <4 x i1>)
+declare void @llvm.masked.store.v8f32(<8 x float>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v16f32(<16 x float>, ptr, i32, <16 x i1>)
+declare void @llvm.masked.store.v32f32(<32 x float>, ptr, i32, <32 x i1>)
+declare void @llvm.masked.store.v64f32(<64 x float>, ptr, i32, <64 x i1>)
-declare void @llvm.masked.store.v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
-declare void @llvm.masked.store.v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
-declare void @llvm.masked.store.v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
+declare void @llvm.masked.store.v8i8(<8 x i8>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v8i16(<8 x i16>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v8i32(<8 x i32>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v16i8(<16 x i8>, ptr, i32, <16 x i1>)
+declare void @llvm.masked.store.v16i16(<16 x i16>, ptr, i32, <16 x i1>)
+declare void @llvm.masked.store.v32i8(<32 x i8>, ptr, i32, <32 x i1>)
attributes #0 = { "target-features"="+sve" }
target triple = "aarch64-unknown-linux-gnu"
-define void @add_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @add_v64i8(ptr %a, ptr %b) #0 {
; CHECK-LABEL: add_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: add z0.b, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%res = add <64 x i8> %op1, %op2
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @add_v32i16(<32 x i16>* %a, <32 x i16>* %b, <32 x i16>* %c) #0 {
+define void @add_v32i16(ptr %a, ptr %b, ptr %c) #0 {
; CHECK-LABEL: add_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%res = add <32 x i16> %op1, %op2
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @abs_v16i32(<16 x i32>* %a) #0 {
+define void @abs_v16i32(ptr %a) #0 {
; CHECK-LABEL: abs_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: abs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%res = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %op1, i1 false)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @abs_v8i64(<8 x i64>* %a) #0 {
+define void @abs_v8i64(ptr %a) #0 {
; CHECK-LABEL: abs_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: abs z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%res = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %op1, i1 false)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @fadd_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
+define void @fadd_v32f16(ptr %a, ptr %b) #0 {
; CHECK-LABEL: fadd_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fadd z0.h, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
%res = fadd <32 x half> %op1, %op2
- store <32 x half> %res, <32 x half>* %a
+ store <32 x half> %res, ptr %a
ret void
}
-define void @fadd_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
+define void @fadd_v16f32(ptr %a, ptr %b) #0 {
; CHECK-LABEL: fadd_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fadd z0.s, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
%res = fadd <16 x float> %op1, %op2
- store <16 x float> %res, <16 x float>* %a
+ store <16 x float> %res, ptr %a
ret void
}
-define void @fadd_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
+define void @fadd_v8f64(ptr %a, ptr %b) #0 {
; CHECK-LABEL: fadd_v8f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fadd z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
%res = fadd <8 x double> %op1, %op2
- store <8 x double> %res, <8 x double>* %a
+ store <8 x double> %res, ptr %a
ret void
}
target triple = "aarch64-unknown-linux-gnu"
; REVB pattern for shuffle v32i8 -> v16i16
-define void @test_revbv16i16(<32 x i8>* %a) #0 {
+define void @test_revbv16i16(ptr %a) #0 {
; CHECK-LABEL: test_revbv16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: revb z0.h, p1/m, z0.h
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <32 x i8>, <32 x i8>* %a
+ %tmp1 = load <32 x i8>, ptr %a
%tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14, i32 17, i32 16, i32 19, i32 18, i32 21, i32 20, i32 23, i32 22, i32 undef, i32 24, i32 27, i32 undef, i32 29, i32 28, i32 undef, i32 undef>
- store <32 x i8> %tmp2, <32 x i8>* %a
+ store <32 x i8> %tmp2, ptr %a
ret void
}
; REVB pattern for shuffle v32i8 -> v8i32
-define void @test_revbv8i32(<32 x i8>* %a) #0 {
+define void @test_revbv8i32(ptr %a) #0 {
; CHECK-LABEL: test_revbv8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: revb z0.s, p1/m, z0.s
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <32 x i8>, <32 x i8>* %a
+ %tmp1 = load <32 x i8>, ptr %a
%tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 29, i32 undef>
- store <32 x i8> %tmp2, <32 x i8>* %a
+ store <32 x i8> %tmp2, ptr %a
ret void
}
; REVB pattern for shuffle v32i8 -> v4i64
-define void @test_revbv4i64(<32 x i8>* %a) #0 {
+define void @test_revbv4i64(ptr %a) #0 {
; CHECK-LABEL: test_revbv4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: revb z0.d, p1/m, z0.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <32 x i8>, <32 x i8>* %a
+ %tmp1 = load <32 x i8>, ptr %a
%tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 31, i32 30, i32 29, i32 undef, i32 27, i32 undef, i32 undef, i32 undef>
- store <32 x i8> %tmp2, <32 x i8>* %a
+ store <32 x i8> %tmp2, ptr %a
ret void
}
; REVH pattern for shuffle v16i16 -> v8i32
-define void @test_revhv8i32(<16 x i16>* %a) #0 {
+define void @test_revhv8i32(ptr %a) #0 {
; CHECK-LABEL: test_revhv8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: revh z0.s, p1/m, z0.s
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <16 x i16>, <16 x i16>* %a
+ %tmp1 = load <16 x i16>, ptr %a
%tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
- store <16 x i16> %tmp2, <16 x i16>* %a
+ store <16 x i16> %tmp2, ptr %a
ret void
}
; REVH pattern for shuffle v16f16 -> v8f32
-define void @test_revhv8f32(<16 x half>* %a) #0 {
+define void @test_revhv8f32(ptr %a) #0 {
; CHECK-LABEL: test_revhv8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: revh z0.s, p1/m, z0.s
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <16 x half>, <16 x half>* %a
+ %tmp1 = load <16 x half>, ptr %a
%tmp2 = shufflevector <16 x half> %tmp1, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
- store <16 x half> %tmp2, <16 x half>* %a
+ store <16 x half> %tmp2, ptr %a
ret void
}
; REVH pattern for shuffle v16i16 -> v4i64
-define void @test_revhv4i64(<16 x i16>* %a) #0 {
+define void @test_revhv4i64(ptr %a) #0 {
; CHECK-LABEL: test_revhv4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: revh z0.d, p1/m, z0.d
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <16 x i16>, <16 x i16>* %a
+ %tmp1 = load <16 x i16>, ptr %a
%tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
- store <16 x i16> %tmp2, <16 x i16>* %a
+ store <16 x i16> %tmp2, ptr %a
ret void
}
; REVW pattern for shuffle v8i32 -> v4i64
-define void @test_revwv4i64(<8 x i32>* %a) #0 {
+define void @test_revwv4i64(ptr %a) #0 {
; CHECK-LABEL: test_revwv4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: revw z0.d, p1/m, z0.d
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <8 x i32>, <8 x i32>* %a
+ %tmp1 = load <8 x i32>, ptr %a
%tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
- store <8 x i32> %tmp2, <8 x i32>* %a
+ store <8 x i32> %tmp2, ptr %a
ret void
}
; REVW pattern for shuffle v8f32 -> v4f64
-define void @test_revwv4f64(<8 x float>* %a) #0 {
+define void @test_revwv4f64(ptr %a) #0 {
; CHECK-LABEL: test_revwv4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: revw z0.d, p1/m, z0.d
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <8 x float>, <8 x float>* %a
+ %tmp1 = load <8 x float>, ptr %a
%tmp2 = shufflevector <8 x float> %tmp1, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
- store <8 x float> %tmp2, <8 x float>* %a
+ store <8 x float> %tmp2, ptr %a
ret void
}
; Don't use SVE for 128-bit vectors
-define <16 x i8> @test_revv16i8(<16 x i8>* %a) #0 {
+define <16 x i8> @test_revv16i8(ptr %a) #0 {
; CHECK-LABEL: test_revv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: rev64 v0.16b, v0.16b
; CHECK-NEXT: ret
- %tmp1 = load <16 x i8>, <16 x i8>* %a
+ %tmp1 = load <16 x i8>, ptr %a
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
ret <16 x i8> %tmp2
}
; REVW pattern for shuffle two v8i32 inputs with the second input available.
-define void @test_revwv8i32v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
+define void @test_revwv8i32v8i32(ptr %a, ptr %b) #0 {
; CHECK-LABEL: test_revwv8i32v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: revw z0.d, p1/m, z0.d
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <8 x i32>, <8 x i32>* %a
- %tmp2 = load <8 x i32>, <8 x i32>* %b
+ %tmp1 = load <8 x i32>, ptr %a
+ %tmp2 = load <8 x i32>, ptr %b
%tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
- store <8 x i32> %tmp3, <8 x i32>* %a
+ store <8 x i32> %tmp3, ptr %a
ret void
}
; REVH pattern for shuffle v32i16 with 256 bits and 512 bits SVE.
-define void @test_revhv32i16(<32 x i16>* %a) #0 {
+define void @test_revhv32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: test_revhv32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: revh z0.d, p1/m, z0.d
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %tmp1 = load <32 x i16>, <32 x i16>* %a
+ %tmp1 = load <32 x i16>, ptr %a
%tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 29, i32 undef>
- store <32 x i16> %tmp2, <32 x i16>* %a
+ store <32 x i16> %tmp2, ptr %a
ret void
}
; Only support to reverse bytes / halfwords / words within elements
-define void @test_rev_elts_fail(<4 x i64>* %a) #1 {
+define void @test_rev_elts_fail(ptr %a) #1 {
; CHECK-LABEL: test_rev_elts_fail:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
- %tmp1 = load <4 x i64>, <4 x i64>* %a
+ %tmp1 = load <4 x i64>, ptr %a
%tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
- store <4 x i64> %tmp2, <4 x i64>* %a
+ store <4 x i64> %tmp2, ptr %a
ret void
}
; the correctness of generated REV instruction for shuffle pattern cannot be guaranteed.
; sve-vector-bits-min=256, sve-vector-bits-max is not set, REV inst can't be generated.
-define void @test_revv8i32(<8 x i32>* %a) #0 {
+define void @test_revv8i32(ptr %a) #0 {
; CHECK-LABEL: test_revv8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
- %tmp1 = load <8 x i32>, <8 x i32>* %a
+ %tmp1 = load <8 x i32>, ptr %a
%tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- store <8 x i32> %tmp2, <8 x i32>* %a
+ store <8 x i32> %tmp2, ptr %a
ret void
}
; REV pattern for v32i8 shuffle with vscale_range(2,2)
-define void @test_revv32i8_vl256(<32 x i8>* %a) #1 {
+define void @test_revv32i8_vl256(ptr %a) #1 {
; CHECK-LABEL: test_revv32i8_vl256:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: rev z0.b, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <32 x i8>, <32 x i8>* %a
+ %tmp1 = load <32 x i8>, ptr %a
%tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- store <32 x i8> %tmp2, <32 x i8>* %a
+ store <32 x i8> %tmp2, ptr %a
ret void
}
; REV pattern for v16i16 shuffle with vscale_range(2,2)
-define void @test_revv16i16_vl256(<16 x i16>* %a) #1 {
+define void @test_revv16i16_vl256(ptr %a) #1 {
; CHECK-LABEL: test_revv16i16_vl256:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: rev z0.h, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <16 x i16>, <16 x i16>* %a
+ %tmp1 = load <16 x i16>, ptr %a
%tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- store <16 x i16> %tmp2, <16 x i16>* %a
+ store <16 x i16> %tmp2, ptr %a
ret void
}
; REV pattern for v8f32 shuffle with vscale_range(2,2)
-define void @test_revv8f32_vl256(<8 x float>* %a) #1 {
+define void @test_revv8f32_vl256(ptr %a) #1 {
; CHECK-LABEL: test_revv8f32_vl256:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: rev z0.s, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <8 x float>, <8 x float>* %a
+ %tmp1 = load <8 x float>, ptr %a
%tmp2 = shufflevector <8 x float> %tmp1, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- store <8 x float> %tmp2, <8 x float>* %a
+ store <8 x float> %tmp2, ptr %a
ret void
}
; REV pattern for v4f64 shuffle with vscale_range(2,2)
-define void @test_revv4f64_vl256(<4 x double>* %a) #1 {
+define void @test_revv4f64_vl256(ptr %a) #1 {
; CHECK-LABEL: test_revv4f64_vl256:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: rev z0.d, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <4 x double>, <4 x double>* %a
+ %tmp1 = load <4 x double>, ptr %a
%tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
- store <4 x double> %tmp2, <4 x double>* %a
+ store <4 x double> %tmp2, ptr %a
ret void
}
; REV pattern for shuffle two v8i32 inputs with the second input available, vscale_range(2,2).
-define void @test_revv8i32v8i32(<8 x i32>* %a, <8 x i32>* %b) #1 {
+define void @test_revv8i32v8i32(ptr %a, ptr %b) #1 {
; CHECK-LABEL: test_revv8i32v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: rev z0.s, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <8 x i32>, <8 x i32>* %a
- %tmp2 = load <8 x i32>, <8 x i32>* %b
+ %tmp1 = load <8 x i32>, ptr %a
+ %tmp2 = load <8 x i32>, ptr %b
%tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
- store <8 x i32> %tmp3, <8 x i32>* %a
+ store <8 x i32> %tmp3, ptr %a
ret void
}
; Illegal REV pattern.
-define void @test_rev_fail(<16 x i16>* %a) #1 {
+define void @test_rev_fail(ptr %a) #1 {
; CHECK-LABEL: test_rev_fail:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
- %tmp1 = load <16 x i16>, <16 x i16>* %a
+ %tmp1 = load <16 x i16>, ptr %a
%tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
- store <16 x i16> %tmp2, <16 x i16>* %a
+ store <16 x i16> %tmp2, ptr %a
ret void
}
; Don't use SVE for 128-bit shuffle with two inputs
-define void @test_revv8i16v8i16(<8 x i16>* %a, <8 x i16>* %b, <16 x i16>* %c) #1 {
+define void @test_revv8i16v8i16(ptr %a, ptr %b, ptr %c) #1 {
; CHECK-LABEL: test_revv8i16v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
- %tmp1 = load <8 x i16>, <8 x i16>* %a
- %tmp2 = load <8 x i16>, <8 x i16>* %b
+ %tmp1 = load <8 x i16>, ptr %a
+ %tmp2 = load <8 x i16>, ptr %b
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
- store <16 x i16> %tmp3, <16 x i16>* %c
+ store <16 x i16> %tmp3, ptr %c
ret void
}
target triple = "aarch64-unknown-linux-gnu"
-define void @zip1_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
+define void @zip1_v32i8(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: zip1_v32i8:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: ptrue p0.b
; VBITS_EQ_512-NEXT: zip1 z0.b, z0.b, z1.b
; VBITS_EQ_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_EQ_512-NEXT: ret
- %tmp1 = load volatile <32 x i8>, <32 x i8>* %a
- %tmp2 = load volatile <32 x i8>, <32 x i8>* %b
+ %tmp1 = load volatile <32 x i8>, ptr %a
+ %tmp2 = load volatile <32 x i8>, ptr %b
%tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47>
- store volatile <32 x i8> %tmp3, <32 x i8>* %a
+ store volatile <32 x i8> %tmp3, ptr %a
ret void
}
-define void @zip_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @zip_v32i16(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: zip_v32i16:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #16
; VBITS_EQ_512-NEXT: add z0.h, z2.h, z0.h
; VBITS_EQ_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_EQ_512-NEXT: ret
- %tmp1 = load <32 x i16>, <32 x i16>* %a
- %tmp2 = load <32 x i16>, <32 x i16>* %b
+ %tmp1 = load <32 x i16>, ptr %a
+ %tmp2 = load <32 x i16>, ptr %b
%tmp3 = shufflevector <32 x i16> %tmp1, <32 x i16> %tmp2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47>
%tmp4 = shufflevector <32 x i16> %tmp1, <32 x i16> %tmp2, <32 x i32> <i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
%tmp5 = add <32 x i16> %tmp3, %tmp4
- store <32 x i16> %tmp5, <32 x i16>* %a
+ store <32 x i16> %tmp5, ptr %a
ret void
}
-define void @zip1_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
+define void @zip1_v16i16(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: zip1_v16i16:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: ptrue p0.h
; VBITS_EQ_512-NEXT: zip1 z0.h, z0.h, z1.h
; VBITS_EQ_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_EQ_512-NEXT: ret
- %tmp1 = load volatile <16 x i16>, <16 x i16>* %a
- %tmp2 = load volatile <16 x i16>, <16 x i16>* %b
+ %tmp1 = load volatile <16 x i16>, ptr %a
+ %tmp2 = load volatile <16 x i16>, ptr %b
%tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
- store volatile <16 x i16> %tmp3, <16 x i16>* %a
+ store volatile <16 x i16> %tmp3, ptr %a
ret void
}
-define void @zip1_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
+define void @zip1_v8i32(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: zip1_v8i32:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: ptrue p0.s
; VBITS_EQ_512-NEXT: zip1 z0.s, z0.s, z1.s
; VBITS_EQ_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_EQ_512-NEXT: ret
- %tmp1 = load volatile <8 x i32>, <8 x i32>* %a
- %tmp2 = load volatile <8 x i32>, <8 x i32>* %b
+ %tmp1 = load volatile <8 x i32>, ptr %a
+ %tmp2 = load volatile <8 x i32>, ptr %b
%tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
- store volatile <8 x i32> %tmp3, <8 x i32>* %a
+ store volatile <8 x i32> %tmp3, ptr %a
ret void
}
-define void @zip_v4f64(<4 x double>* %a, <4 x double>* %b) #0 {
+define void @zip_v4f64(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: zip_v4f64:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: ptrue p0.d
; VBITS_EQ_512-NEXT: mov sp, x29
; VBITS_EQ_512-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; VBITS_EQ_512-NEXT: ret
- %tmp1 = load <4 x double>, <4 x double>* %a
- %tmp2 = load <4 x double>, <4 x double>* %b
+ %tmp1 = load <4 x double>, ptr %a
+ %tmp2 = load <4 x double>, ptr %b
%tmp3 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
%tmp4 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
%tmp5 = fadd <4 x double> %tmp3, %tmp4
- store <4 x double> %tmp5, <4 x double>* %a
+ store <4 x double> %tmp5, ptr %a
ret void
}
; Don't use SVE for 128-bit vectors
-define void @zip_v4i32(<4 x i32>* %a, <4 x i32>* %b) #0 {
+define void @zip_v4i32(ptr %a, ptr %b) #0 {
; CHECK-LABEL: zip_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <4 x i32>, <4 x i32>* %a
- %tmp2 = load <4 x i32>, <4 x i32>* %b
+ %tmp1 = load <4 x i32>, ptr %a
+ %tmp2 = load <4 x i32>, ptr %b
%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
%tmp5 = add <4 x i32> %tmp3, %tmp4
- store <4 x i32> %tmp5, <4 x i32>* %a
+ store <4 x i32> %tmp5, ptr %a
ret void
}
-define void @zip1_v8i32_undef(<8 x i32>* %a) #0 {
+define void @zip1_v8i32_undef(ptr %a) #0 {
; VBITS_EQ_256-LABEL: zip1_v8i32_undef:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: ptrue p0.s
; VBITS_EQ_512-NEXT: zip1 z0.s, z0.s, z0.s
; VBITS_EQ_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_EQ_512-NEXT: ret
- %tmp1 = load volatile <8 x i32>, <8 x i32>* %a
+ %tmp1 = load volatile <8 x i32>, ptr %a
%tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
- store volatile <8 x i32> %tmp2, <8 x i32>* %a
+ store volatile <8 x i32> %tmp2, ptr %a
ret void
}
-define void @trn_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
+define void @trn_v32i8(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: trn_v32i8:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: ptrue p0.b
; VBITS_EQ_512-NEXT: add z0.b, z2.b, z0.b
; VBITS_EQ_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_EQ_512-NEXT: ret
- %tmp1 = load <32 x i8>, <32 x i8>* %a
- %tmp2 = load <32 x i8>, <32 x i8>* %b
+ %tmp1 = load <32 x i8>, ptr %a
+ %tmp2 = load <32 x i8>, ptr %b
%tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> <i32 0, i32 32, i32 2, i32 34, i32 4, i32 36, i32 6, i32 38, i32 8, i32 40, i32 10, i32 42, i32 12, i32 44, i32 14, i32 46, i32 16, i32 48, i32 18, i32 50, i32 20, i32 52, i32 22, i32 54, i32 24, i32 56, i32 26, i32 58, i32 28, i32 60, i32 30, i32 62>
%tmp4 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> <i32 1, i32 33, i32 3, i32 35, i32 undef, i32 37, i32 7, i32 undef, i32 undef, i32 41, i32 11, i32 43, i32 13, i32 45, i32 15, i32 47, i32 17, i32 49, i32 19, i32 51, i32 21, i32 53, i32 23, i32 55, i32 25, i32 57, i32 27, i32 59, i32 29, i32 61, i32 31, i32 63>
%tmp5 = add <32 x i8> %tmp3, %tmp4
- store <32 x i8> %tmp5, <32 x i8>* %a
+ store <32 x i8> %tmp5, ptr %a
ret void
}
-define void @trn_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @trn_v32i16(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: trn_v32i16:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: mov x8, #16
; VBITS_EQ_512-NEXT: add z0.h, z2.h, z0.h
; VBITS_EQ_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_EQ_512-NEXT: ret
- %tmp1 = load <32 x i16>, <32 x i16>* %a
- %tmp2 = load <32 x i16>, <32 x i16>* %b
+ %tmp1 = load <32 x i16>, ptr %a
+ %tmp2 = load <32 x i16>, ptr %b
%tmp3 = shufflevector <32 x i16> %tmp1, <32 x i16> %tmp2, <32 x i32> <i32 0, i32 32, i32 2, i32 34, i32 4, i32 36, i32 6, i32 38, i32 8, i32 40, i32 10, i32 42, i32 12, i32 44, i32 14, i32 46, i32 16, i32 48, i32 18, i32 50, i32 20, i32 52, i32 22, i32 54, i32 24, i32 56, i32 26, i32 58, i32 28, i32 60, i32 30, i32 62>
%tmp4 = shufflevector <32 x i16> %tmp1, <32 x i16> %tmp2, <32 x i32> <i32 1, i32 33, i32 3, i32 35, i32 undef, i32 37, i32 7, i32 undef, i32 undef, i32 41, i32 11, i32 43, i32 13, i32 45, i32 15, i32 47, i32 17, i32 49, i32 19, i32 51, i32 21, i32 53, i32 23, i32 55, i32 25, i32 57, i32 27, i32 59, i32 29, i32 61, i32 31, i32 63>
%tmp5 = add <32 x i16> %tmp3, %tmp4
- store <32 x i16> %tmp5, <32 x i16>* %a
+ store <32 x i16> %tmp5, ptr %a
ret void
}
-define void @trn_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
+define void @trn_v16i16(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: trn_v16i16:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: ptrue p0.h
; VBITS_EQ_512-NEXT: add z0.h, z2.h, z0.h
; VBITS_EQ_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_EQ_512-NEXT: ret
- %tmp1 = load <16 x i16>, <16 x i16>* %a
- %tmp2 = load <16 x i16>, <16 x i16>* %b
+ %tmp1 = load <16 x i16>, ptr %a
+ %tmp2 = load <16 x i16>, ptr %b
%tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
%tmp4 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
%tmp5 = add <16 x i16> %tmp3, %tmp4
- store <16 x i16> %tmp5, <16 x i16>* %a
+ store <16 x i16> %tmp5, ptr %a
ret void
}
-define void @trn_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
+define void @trn_v8i32(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: trn_v8i32:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: ptrue p0.s
; VBITS_EQ_512-NEXT: add z0.s, z2.s, z0.s
; VBITS_EQ_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_EQ_512-NEXT: ret
- %tmp1 = load <8 x i32>, <8 x i32>* %a
- %tmp2 = load <8 x i32>, <8 x i32>* %b
+ %tmp1 = load <8 x i32>, ptr %a
+ %tmp2 = load <8 x i32>, ptr %b
%tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
%tmp4 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
%tmp5 = add <8 x i32> %tmp3, %tmp4
- store <8 x i32> %tmp5, <8 x i32>* %a
+ store <8 x i32> %tmp5, ptr %a
ret void
}
-define void @trn_v4f64(<4 x double>* %a, <4 x double>* %b) #0 {
+define void @trn_v4f64(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-LABEL: trn_v4f64:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: ptrue p0.d
; VBITS_EQ_512-NEXT: fadd z0.d, p0/m, z0.d, z2.d
; VBITS_EQ_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_EQ_512-NEXT: ret
- %tmp1 = load <4 x double>, <4 x double>* %a
- %tmp2 = load <4 x double>, <4 x double>* %b
+ %tmp1 = load <4 x double>, ptr %a
+ %tmp2 = load <4 x double>, ptr %b
%tmp3 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
%tmp4 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
%tmp5 = fadd <4 x double> %tmp3, %tmp4
- store <4 x double> %tmp5, <4 x double>* %a
+ store <4 x double> %tmp5, ptr %a
ret void
}
; Don't use SVE for 128-bit vectors
-define void @trn_v4f32(<4 x float>* %a, <4 x float>* %b) #0 {
+define void @trn_v4f32(ptr %a, ptr %b) #0 {
; CHECK-LABEL: trn_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <4 x float>, <4 x float>* %a
- %tmp2 = load <4 x float>, <4 x float>* %b
+ %tmp1 = load <4 x float>, ptr %a
+ %tmp2 = load <4 x float>, ptr %b
%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
%tmp5 = fadd <4 x float> %tmp3, %tmp4
- store <4 x float> %tmp5, <4 x float>* %a
+ store <4 x float> %tmp5, ptr %a
ret void
}
-define void @trn_v8i32_undef(<8 x i32>* %a) #0 {
+define void @trn_v8i32_undef(ptr %a) #0 {
; VBITS_EQ_256-LABEL: trn_v8i32_undef:
; VBITS_EQ_256: // %bb.0:
; VBITS_EQ_256-NEXT: ptrue p0.s
; VBITS_EQ_512-NEXT: add z0.s, z1.s, z0.s
; VBITS_EQ_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_EQ_512-NEXT: ret
- %tmp1 = load <8 x i32>, <8 x i32>* %a
+ %tmp1 = load <8 x i32>, ptr %a
%tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
%tmp4 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
%tmp5 = add <8 x i32> %tmp3, %tmp4
- store <8 x i32> %tmp5, <8 x i32>* %a
+ store <8 x i32> %tmp5, ptr %a
ret void
}
; Emit zip2 instruction for v32i8 shuffle with vscale_range(2,2),
; since the size of v32i8 is the same as the runtime vector length.
-define void @zip2_v32i8(<32 x i8>* %a, <32 x i8>* %b) #1 {
+define void @zip2_v32i8(ptr %a, ptr %b) #1 {
; CHECK-LABEL: zip2_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: zip2 z0.b, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load volatile <32 x i8>, <32 x i8>* %a
- %tmp2 = load volatile <32 x i8>, <32 x i8>* %b
+ %tmp1 = load volatile <32 x i8>, ptr %a
+ %tmp2 = load volatile <32 x i8>, ptr %b
%tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> <i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
- store volatile <32 x i8> %tmp3, <32 x i8>* %a
+ store volatile <32 x i8> %tmp3, ptr %a
ret void
}
; Emit zip2 instruction for v16i16 shuffle with vscale_range(2,2),
; since the size of v16i16 is the same as the runtime vector length.
-define void @zip2_v16i16(<16 x i16>* %a, <16 x i16>* %b) #1 {
+define void @zip2_v16i16(ptr %a, ptr %b) #1 {
; CHECK-LABEL: zip2_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: zip2 z0.h, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load volatile <16 x i16>, <16 x i16>* %a
- %tmp2 = load volatile <16 x i16>, <16 x i16>* %b
+ %tmp1 = load volatile <16 x i16>, ptr %a
+ %tmp2 = load volatile <16 x i16>, ptr %b
%tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
- store volatile <16 x i16> %tmp3, <16 x i16>* %a
+ store volatile <16 x i16> %tmp3, ptr %a
ret void
}
; Emit zip2 instruction for v8i32 shuffle with vscale_range(2,2),
; since the size of v8i32 is the same as the runtime vector length.
-define void @zip2_v8i32(<8 x i32>* %a, <8 x i32>* %b) #1 {
+define void @zip2_v8i32(ptr %a, ptr %b) #1 {
; CHECK-LABEL: zip2_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: zip2 z0.s, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load volatile <8 x i32>, <8 x i32>* %a
- %tmp2 = load volatile <8 x i32>, <8 x i32>* %b
+ %tmp1 = load volatile <8 x i32>, ptr %a
+ %tmp2 = load volatile <8 x i32>, ptr %b
%tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
- store volatile <8 x i32> %tmp3, <8 x i32>* %a
+ store volatile <8 x i32> %tmp3, ptr %a
ret void
}
; Emit zip2 instruction for v8i32 and undef shuffle with vscale_range(2,2)
-define void @zip2_v8i32_undef(<8 x i32>* %a) #1 {
+define void @zip2_v8i32_undef(ptr %a) #1 {
; CHECK-LABEL: zip2_v8i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: zip2 z0.s, z0.s, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load volatile <8 x i32>, <8 x i32>* %a
+ %tmp1 = load volatile <8 x i32>, ptr %a
%tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
- store volatile <8 x i32> %tmp2, <8 x i32>* %a
+ store volatile <8 x i32> %tmp2, ptr %a
ret void
}
; Emit uzp1/2 instruction for v32i8 shuffle with vscale_range(2,2),
; since the size of v32i8 is the same as the runtime vector length.
-define void @uzp_v32i8(<32 x i8>* %a, <32 x i8>* %b) #1 {
+define void @uzp_v32i8(ptr %a, ptr %b) #1 {
; CHECK-LABEL: uzp_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: add z0.b, z2.b, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <32 x i8>, <32 x i8>* %a
- %tmp2 = load <32 x i8>, <32 x i8>* %b
+ %tmp1 = load <32 x i8>, ptr %a
+ %tmp2 = load <32 x i8>, ptr %b
%tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
%tmp4 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 9, i32 11, i32 13, i32 undef, i32 undef, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 33, i32 35, i32 37, i32 39, i32 41, i32 43, i32 45, i32 47, i32 49, i32 51, i32 53, i32 55, i32 57, i32 59, i32 61, i32 63>
%tmp5 = add <32 x i8> %tmp3, %tmp4
- store <32 x i8> %tmp5, <32 x i8>* %a
+ store <32 x i8> %tmp5, ptr %a
ret void
}
; Emit uzp1/2 instruction for v32i16 shuffle with vscale_range(2,2),
; v32i16 will be expanded into two v16i16, and the size of v16i16 is
; the same as the runtime vector length.
-define void @uzp_v32i16(<32 x i16>* %a, <32 x i16>* %b) #1 {
+define void @uzp_v32i16(ptr %a, ptr %b) #1 {
; CHECK-LABEL: uzp_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #16
; CHECK-NEXT: st1h { z1.h }, p0, [x0, x8, lsl #1]
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <32 x i16>, <32 x i16>* %a
- %tmp2 = load <32 x i16>, <32 x i16>* %b
+ %tmp1 = load <32 x i16>, ptr %a
+ %tmp2 = load <32 x i16>, ptr %b
%tmp3 = shufflevector <32 x i16> %tmp1, <32 x i16> %tmp2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
%tmp4 = shufflevector <32 x i16> %tmp1, <32 x i16> %tmp2, <32 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 9, i32 11, i32 13, i32 undef, i32 undef, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 33, i32 35, i32 37, i32 39, i32 41, i32 43, i32 45, i32 47, i32 49, i32 51, i32 53, i32 55, i32 57, i32 59, i32 61, i32 63>
%tmp5 = add <32 x i16> %tmp3, %tmp4
- store <32 x i16> %tmp5, <32 x i16>* %a
+ store <32 x i16> %tmp5, ptr %a
ret void
}
; Emit uzp1/2 instruction for v16i16 shuffle with vscale_range(2,2),
; since the size of v16i16 is the same as the runtime vector length.
-define void @uzp_v16i16(<16 x i16>* %a, <16 x i16>* %b) #1 {
+define void @uzp_v16i16(ptr %a, ptr %b) #1 {
; CHECK-LABEL: uzp_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: add z0.h, z2.h, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <16 x i16>, <16 x i16>* %a
- %tmp2 = load <16 x i16>, <16 x i16>* %b
+ %tmp1 = load <16 x i16>, ptr %a
+ %tmp2 = load <16 x i16>, ptr %b
%tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
%tmp4 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
%tmp5 = add <16 x i16> %tmp3, %tmp4
- store <16 x i16> %tmp5, <16 x i16>* %a
+ store <16 x i16> %tmp5, ptr %a
ret void
}
; Emit uzp1/2 instruction for v8f32 shuffle with vscale_range(2,2),
; since the size of v8f32 is the same as the runtime vector length.
-define void @uzp_v8f32(<8 x float>* %a, <8 x float>* %b) #1 {
+define void @uzp_v8f32(ptr %a, ptr %b) #1 {
; CHECK-LABEL: uzp_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fadd z0.s, z2.s, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <8 x float>, <8 x float>* %a
- %tmp2 = load <8 x float>, <8 x float>* %b
+ %tmp1 = load <8 x float>, ptr %a
+ %tmp2 = load <8 x float>, ptr %b
%tmp3 = shufflevector <8 x float> %tmp1, <8 x float> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 6, i32 undef, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x float> %tmp1, <8 x float> %tmp2, <8 x i32> <i32 1, i32 undef, i32 5, i32 7, i32 9, i32 11, i32 undef, i32 undef>
%tmp5 = fadd <8 x float> %tmp3, %tmp4
- store <8 x float> %tmp5, <8 x float>* %a
+ store <8 x float> %tmp5, ptr %a
ret void
}
; Emit uzp1/2 instruction for v4i64 shuffle with vscale_range(2,2),
; since the size of v4i64 is the same as the runtime vector length.
-define void @uzp_v4i64(<4 x i64>* %a, <4 x i64>* %b) #1 {
+define void @uzp_v4i64(ptr %a, ptr %b) #1 {
; CHECK-LABEL: uzp_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: add z0.d, z2.d, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <4 x i64>, <4 x i64>* %a
- %tmp2 = load <4 x i64>, <4 x i64>* %b
+ %tmp1 = load <4 x i64>, ptr %a
+ %tmp2 = load <4 x i64>, ptr %b
%tmp3 = shufflevector <4 x i64> %tmp1, <4 x i64> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%tmp4 = shufflevector <4 x i64> %tmp1, <4 x i64> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
%tmp5 = add <4 x i64> %tmp3, %tmp4
- store <4 x i64> %tmp5, <4 x i64>* %a
+ store <4 x i64> %tmp5, ptr %a
ret void
}
; Don't use SVE for 128-bit vectors
-define void @uzp_v8i16(<8 x i16>* %a, <8 x i16>* %b) #1 {
+define void @uzp_v8i16(ptr %a, ptr %b) #1 {
; CHECK-LABEL: uzp_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: add v0.8h, v2.8h, v0.8h
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <8 x i16>, <8 x i16>* %a
- %tmp2 = load <8 x i16>, <8 x i16>* %b
+ %tmp1 = load <8 x i16>, ptr %a
+ %tmp2 = load <8 x i16>, ptr %b
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
%tmp5 = add <8 x i16> %tmp3, %tmp4
- store <8 x i16> %tmp5, <8 x i16>* %a
+ store <8 x i16> %tmp5, ptr %a
ret void
}
; Emit uzp1/2 instruction for v8i32 and undef shuffle with vscale_range(2,2)
-define void @uzp_v8i32_undef(<8 x i32>* %a) #1 {
+define void @uzp_v8i32_undef(ptr %a) #1 {
; CHECK-LABEL: uzp_v8i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: add z0.s, z1.s, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %tmp1 = load <8 x i32>, <8 x i32>* %a
+ %tmp1 = load <8 x i32>, ptr %a
%tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 0, i32 2, i32 4, i32 6>
%tmp4 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 1, i32 3, i32 5, i32 7>
%tmp5 = add <8 x i32> %tmp3, %tmp4
- store <8 x i32> %tmp5, <8 x i32>* %a
+ store <8 x i32> %tmp5, ptr %a
ret void
}
; Only zip1 can be emitted safely with vscale_range(2,4).
; vscale_range(2,4) means different min/max vector sizes, zip2 relies on
; knowing which indices represent the high half of sve vector register.
-define void @zip_vscale2_4(<4 x double>* %a, <4 x double>* %b) #2 {
+define void @zip_vscale2_4(ptr %a, ptr %b) #2 {
; CHECK-LABEL: zip_vscale2_4:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
- %tmp1 = load <4 x double>, <4 x double>* %a
- %tmp2 = load <4 x double>, <4 x double>* %b
+ %tmp1 = load <4 x double>, ptr %a
+ %tmp2 = load <4 x double>, ptr %b
%tmp3 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
%tmp4 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
%tmp5 = fadd <4 x double> %tmp3, %tmp4
- store <4 x double> %tmp5, <4 x double>* %a
+ store <4 x double> %tmp5, ptr %a
ret void
}
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s
-define i1 @ptest_v16i1_256bit_min_sve(float* %a, float * %b) vscale_range(2, 0) {
+define i1 @ptest_v16i1_256bit_min_sve(ptr %a, ptr %b) vscale_range(2, 0) {
; CHECK-LABEL: ptest_v16i1_256bit_min_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #8
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
- %v0 = bitcast float* %a to <16 x float>*
- %v1 = load <16 x float>, <16 x float>* %v0, align 4
+ %v1 = load <16 x float>, ptr %a, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
ret i1 %v3
}
-define i1 @ptest_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4, 0) {
+define i1 @ptest_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {
; CHECK-LABEL: ptest_v16i1_512bit_min_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
- %v0 = bitcast float* %a to <16 x float>*
- %v1 = load <16 x float>, <16 x float>* %v0, align 4
+ %v1 = load <16 x float>, ptr %a, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
ret i1 %v3
}
-define i1 @ptest_v16i1_512bit_sve(float* %a, float * %b) vscale_range(4, 4) {
+define i1 @ptest_v16i1_512bit_sve(ptr %a, ptr %b) vscale_range(4, 4) {
; CHECK-LABEL: ptest_v16i1_512bit_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
- %v0 = bitcast float* %a to <16 x float>*
- %v1 = load <16 x float>, <16 x float>* %v0, align 4
+ %v1 = load <16 x float>, ptr %a, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
ret i1 %v3
}
-define i1 @ptest_or_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4, 0) {
+define i1 @ptest_or_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {
; CHECK-LABEL: ptest_or_v16i1_512bit_min_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
- %v0 = bitcast float* %a to <16 x float>*
- %v1 = load <16 x float>, <16 x float>* %v0, align 4
+ %v1 = load <16 x float>, ptr %a, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
- %v3 = bitcast float* %b to <16 x float>*
- %v4 = load <16 x float>, <16 x float>* %v3, align 4
+ %v4 = load <16 x float>, ptr %b, align 4
%v5 = fcmp une <16 x float> %v4, zeroinitializer
%v6 = or <16 x i1> %v2, %v5
%v7 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v6)
; AND reduction.
;
-define i1 @ptest_and_v16i1_512bit_sve(float* %a, float * %b) vscale_range(4, 4) {
+define i1 @ptest_and_v16i1_512bit_sve(ptr %a, ptr %b) vscale_range(4, 4) {
; CHECK-LABEL: ptest_and_v16i1_512bit_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
- %v0 = bitcast float* %a to <16 x float>*
- %v1 = load <16 x float>, <16 x float>* %v0, align 4
+ %v1 = load <16 x float>, ptr %a, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
- %v3 = bitcast float* %b to <16 x float>*
- %v4 = load <16 x float>, <16 x float>* %v3, align 4
+ %v4 = load <16 x float>, ptr %b, align 4
%v5 = fcmp une <16 x float> %v4, zeroinitializer
%v6 = and <16 x i1> %v2, %v5
%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)
ret i1 %v7
}
-define i1 @ptest_and_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4, 0) {
+define i1 @ptest_and_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {
; CHECK-LABEL: ptest_and_v16i1_512bit_min_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
- %v0 = bitcast float* %a to <16 x float>*
- %v1 = load <16 x float>, <16 x float>* %v0, align 4
+ %v1 = load <16 x float>, ptr %a, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
- %v3 = bitcast float* %b to <16 x float>*
- %v4 = load <16 x float>, <16 x float>* %v3, align 4
+ %v4 = load <16 x float>, ptr %b, align 4
%v5 = fcmp une <16 x float> %v4, zeroinitializer
%v6 = and <16 x i1> %v2, %v5
%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)
ret <16 x i8> %res
}
-define void @bitreverse_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define void @bitreverse_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: bitreverse_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: rbit z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i8>, <32 x i8>* %a
+ %op = load <32 x i8>, ptr %a
%res = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %op)
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @bitreverse_v64i8(<64 x i8>* %a) #0 {
+define void @bitreverse_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: bitreverse_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: rbit z0.b, p0/m, z0.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <64 x i8>, <64 x i8>* %a
+ %op = load <64 x i8>, ptr %a
%res = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %op)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @bitreverse_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define void @bitreverse_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: bitreverse_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: rbit z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x i8>, <128 x i8>* %a
+ %op = load <128 x i8>, ptr %a
%res = call <128 x i8> @llvm.bitreverse.v128i8(<128 x i8> %op)
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @bitreverse_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define void @bitreverse_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: bitreverse_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: rbit z0.b, p0/m, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <256 x i8>, <256 x i8>* %a
+ %op = load <256 x i8>, ptr %a
%res = call <256 x i8> @llvm.bitreverse.v256i8(<256 x i8> %op)
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @bitreverse_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define void @bitreverse_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: bitreverse_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%res = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %op)
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @bitreverse_v32i16(<32 x i16>* %a) #0 {
+define void @bitreverse_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: bitreverse_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: rbit z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%res = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %op)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @bitreverse_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define void @bitreverse_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: bitreverse_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%res = call <64 x i16> @llvm.bitreverse.v64i16(<64 x i16> %op)
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @bitreverse_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define void @bitreverse_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: bitreverse_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%res = call <128 x i16> @llvm.bitreverse.v128i16(<128 x i16> %op)
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @bitreverse_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define void @bitreverse_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: bitreverse_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%res = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %op)
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @bitreverse_v16i32(<16 x i32>* %a) #0 {
+define void @bitreverse_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: bitreverse_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: rbit z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%res = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %op)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @bitreverse_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define void @bitreverse_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: bitreverse_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%res = call <32 x i32> @llvm.bitreverse.v32i32(<32 x i32> %op)
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @bitreverse_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define void @bitreverse_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: bitreverse_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%res = call <64 x i32> @llvm.bitreverse.v64i32(<64 x i32> %op)
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @bitreverse_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define void @bitreverse_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: bitreverse_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: rbit z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%res = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %op)
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @bitreverse_v8i64(<8 x i64>* %a) #0 {
+define void @bitreverse_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: bitreverse_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: rbit z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%res = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %op)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @bitreverse_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define void @bitreverse_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: bitreverse_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: rbit z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%res = call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> %op)
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @bitreverse_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define void @bitreverse_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: bitreverse_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: rbit z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%res = call <32 x i64> @llvm.bitreverse.v32i64(<32 x i64> %op)
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @bswap_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define void @bswap_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: bswap_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: revb z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x i16>, <16 x i16>* %a
+ %op = load <16 x i16>, ptr %a
%res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %op)
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @bswap_v32i16(<32 x i16>* %a) #0 {
+define void @bswap_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: bswap_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: revb z0.h, p0/m, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <32 x i16>, <32 x i16>* %a
+ %op = load <32 x i16>, ptr %a
%res = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %op)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @bswap_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define void @bswap_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: bswap_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: revb z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x i16>, <64 x i16>* %a
+ %op = load <64 x i16>, ptr %a
%res = call <64 x i16> @llvm.bswap.v64i16(<64 x i16> %op)
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @bswap_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define void @bswap_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: bswap_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: revb z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <128 x i16>, <128 x i16>* %a
+ %op = load <128 x i16>, ptr %a
%res = call <128 x i16> @llvm.bswap.v128i16(<128 x i16> %op)
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @bswap_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define void @bswap_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: bswap_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: revb z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <8 x i32>, <8 x i32>* %a
+ %op = load <8 x i32>, ptr %a
%res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %op)
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @bswap_v16i32(<16 x i32>* %a) #0 {
+define void @bswap_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: bswap_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: revb z0.s, p0/m, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <16 x i32>, <16 x i32>* %a
+ %op = load <16 x i32>, ptr %a
%res = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %op)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @bswap_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define void @bswap_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: bswap_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: revb z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i32>, <32 x i32>* %a
+ %op = load <32 x i32>, ptr %a
%res = call <32 x i32> @llvm.bswap.v32i32(<32 x i32> %op)
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @bswap_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define void @bswap_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: bswap_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: revb z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <64 x i32>, <64 x i32>* %a
+ %op = load <64 x i32>, ptr %a
%res = call <64 x i32> @llvm.bswap.v64i32(<64 x i32> %op)
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @bswap_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define void @bswap_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: bswap_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: revb z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <4 x i64>, <4 x i64>* %a
+ %op = load <4 x i64>, ptr %a
%res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %op)
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @bswap_v8i64(<8 x i64>* %a) #0 {
+define void @bswap_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: bswap_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: revb z0.d, p0/m, z0.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op = load <8 x i64>, <8 x i64>* %a
+ %op = load <8 x i64>, ptr %a
%res = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %op)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @bswap_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define void @bswap_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: bswap_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: revb z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <16 x i64>, <16 x i64>* %a
+ %op = load <16 x i64>, ptr %a
%res = call <16 x i64> @llvm.bswap.v16i64(<16 x i64> %op)
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @bswap_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define void @bswap_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: bswap_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: revb z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op = load <32 x i64>, <32 x i64>* %a
+ %op = load <32 x i64>, ptr %a
%res = call <32 x i64> @llvm.bswap.v32i64(<32 x i64> %op)
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
ret <16 x i8> %res
}
-define void @sdiv_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
+define void @sdiv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: sdiv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
+ %op1 = load <32 x i8>, ptr %a
%res = sdiv <32 x i8> %op1, shufflevector (<32 x i8> insertelement (<32 x i8> poison, i8 32, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer)
- store <32 x i8> %res, <32 x i8>* %a
+ store <32 x i8> %res, ptr %a
ret void
}
-define void @sdiv_v64i8(<64 x i8>* %a) #0 {
+define void @sdiv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: sdiv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: asrd z0.b, p0/m, z0.b, #5
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
+ %op1 = load <64 x i8>, ptr %a
%res = sdiv <64 x i8> %op1, shufflevector (<64 x i8> insertelement (<64 x i8> poison, i8 32, i32 0), <64 x i8> poison, <64 x i32> zeroinitializer)
- store <64 x i8> %res, <64 x i8>* %a
+ store <64 x i8> %res, ptr %a
ret void
}
-define void @sdiv_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
+define void @sdiv_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: sdiv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
+ %op1 = load <128 x i8>, ptr %a
%res = sdiv <128 x i8> %op1, shufflevector (<128 x i8> insertelement (<128 x i8> poison, i8 32, i32 0), <128 x i8> poison, <128 x i32> zeroinitializer)
- store <128 x i8> %res, <128 x i8>* %a
+ store <128 x i8> %res, ptr %a
ret void
}
-define void @sdiv_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
+define void @sdiv_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
+ %op1 = load <256 x i8>, ptr %a
%res = sdiv <256 x i8> %op1, shufflevector (<256 x i8> insertelement (<256 x i8> poison, i8 32, i32 0), <256 x i8> poison, <256 x i32> zeroinitializer)
- store <256 x i8> %res, <256 x i8>* %a
+ store <256 x i8> %res, ptr %a
ret void
}
ret <8 x i16> %res
}
-define void @sdiv_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
+define void @sdiv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: sdiv_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
+ %op1 = load <16 x i16>, ptr %a
%res = sdiv <16 x i16> %op1, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 32, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer)
- store <16 x i16> %res, <16 x i16>* %a
+ store <16 x i16> %res, ptr %a
ret void
}
-define void @sdiv_v32i16(<32 x i16>* %a) #0 {
+define void @sdiv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: sdiv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: asrd z0.h, p0/m, z0.h, #5
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
+ %op1 = load <32 x i16>, ptr %a
%res = sdiv <32 x i16> %op1, shufflevector (<32 x i16> insertelement (<32 x i16> poison, i16 32, i32 0), <32 x i16> poison, <32 x i32> zeroinitializer)
- store <32 x i16> %res, <32 x i16>* %a
+ store <32 x i16> %res, ptr %a
ret void
}
-define void @sdiv_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
+define void @sdiv_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: sdiv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
+ %op1 = load <64 x i16>, ptr %a
%res = sdiv <64 x i16> %op1, shufflevector (<64 x i16> insertelement (<64 x i16> poison, i16 32, i32 0), <64 x i16> poison, <64 x i32> zeroinitializer)
- store <64 x i16> %res, <64 x i16>* %a
+ store <64 x i16> %res, ptr %a
ret void
}
-define void @sdiv_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
+define void @sdiv_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
+ %op1 = load <128 x i16>, ptr %a
%res = sdiv <128 x i16> %op1, shufflevector (<128 x i16> insertelement (<128 x i16> poison, i16 32, i32 0), <128 x i16> poison, <128 x i32> zeroinitializer)
- store <128 x i16> %res, <128 x i16>* %a
+ store <128 x i16> %res, ptr %a
ret void
}
ret <4 x i32> %res
}
-define void @sdiv_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
+define void @sdiv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: sdiv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
+ %op1 = load <8 x i32>, ptr %a
%res = sdiv <8 x i32> %op1, shufflevector (<8 x i32> insertelement (<8 x i32> poison, i32 32, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer)
- store <8 x i32> %res, <8 x i32>* %a
+ store <8 x i32> %res, ptr %a
ret void
}
-define void @sdiv_v16i32(<16 x i32>* %a) #0 {
+define void @sdiv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: sdiv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: asrd z0.s, p0/m, z0.s, #5
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
+ %op1 = load <16 x i32>, ptr %a
%res = sdiv <16 x i32> %op1, shufflevector (<16 x i32> insertelement (<16 x i32> poison, i32 32, i32 0), <16 x i32> poison, <16 x i32> zeroinitializer)
- store <16 x i32> %res, <16 x i32>* %a
+ store <16 x i32> %res, ptr %a
ret void
}
-define void @sdiv_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
+define void @sdiv_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: sdiv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
+ %op1 = load <32 x i32>, ptr %a
%res = sdiv <32 x i32> %op1, shufflevector (<32 x i32> insertelement (<32 x i32> poison, i32 32, i32 0), <32 x i32> poison, <32 x i32> zeroinitializer)
- store <32 x i32> %res, <32 x i32>* %a
+ store <32 x i32> %res, ptr %a
ret void
}
-define void @sdiv_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
+define void @sdiv_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
+ %op1 = load <64 x i32>, ptr %a
%res = sdiv <64 x i32> %op1, shufflevector (<64 x i32> insertelement (<64 x i32> poison, i32 32, i32 0), <64 x i32> poison, <64 x i32> zeroinitializer)
- store <64 x i32> %res, <64 x i32>* %a
+ store <64 x i32> %res, ptr %a
ret void
}
ret <2 x i64> %res
}
-define void @sdiv_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
+define void @sdiv_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: sdiv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
+ %op1 = load <4 x i64>, ptr %a
%res = sdiv <4 x i64> %op1, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 32, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer)
- store <4 x i64> %res, <4 x i64>* %a
+ store <4 x i64> %res, ptr %a
ret void
}
-define void @sdiv_v8i64(<8 x i64>* %a) #0 {
+define void @sdiv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: sdiv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: asrd z0.d, p0/m, z0.d, #5
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
+ %op1 = load <8 x i64>, ptr %a
%res = sdiv <8 x i64> %op1, shufflevector (<8 x i64> insertelement (<8 x i64> poison, i64 32, i32 0), <8 x i64> poison, <8 x i32> zeroinitializer)
- store <8 x i64> %res, <8 x i64>* %a
+ store <8 x i64> %res, ptr %a
ret void
}
-define void @sdiv_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
+define void @sdiv_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: sdiv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
+ %op1 = load <16 x i64>, ptr %a
%res = sdiv <16 x i64> %op1, shufflevector (<16 x i64> insertelement (<16 x i64> poison, i64 32, i32 0), <16 x i64> poison, <16 x i32> zeroinitializer)
- store <16 x i64> %res, <16 x i64>* %a
+ store <16 x i64> %res, ptr %a
ret void
}
-define void @sdiv_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
+define void @sdiv_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
+ %op1 = load <32 x i64>, ptr %a
%res = sdiv <32 x i64> %op1, shufflevector (<32 x i64> insertelement (<32 x i64> poison, i64 32, i32 0), <32 x i64> poison, <32 x i32> zeroinitializer)
- store <32 x i64> %res, <32 x i64>* %a
+ store <32 x i64> %res, ptr %a
ret void
}
; bigger than NEON. However, having no support opens us up to a code generator
; hang when expanding BUILD_VECTOR. Here we just validate the promblematic case
; successfully exits code generation.
-define void @hang_when_merging_stores_after_legalisation(<8 x i32>* %a, <2 x i32> %b) vscale_range(2,2) #0 {
+define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) vscale_range(2,2) #0 {
; CHECK-LABEL: hang_when_merging_stores_after_legalisation:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: ret
%splat = shufflevector <2 x i32> %b, <2 x i32> undef, <8 x i32> zeroinitializer
%interleaved.vec = shufflevector <8 x i32> %splat, <8 x i32> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
- store <8 x i32> %interleaved.vec, <8 x i32>* %a, align 4
+ store <8 x i32> %interleaved.vec, ptr %a, align 4
ret void
}
; Ensure we don't crash when trying to lower a shuffle via an extract
-define void @crash_when_lowering_extract_shuffle(<32 x i32>* %dst, i1 %cond) vscale_range(2,2) #0 {
+define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_range(2,2) #0 {
; CHECK-LABEL: crash_when_lowering_extract_shuffle:
; CHECK: // %bb.0:
; CHECK-NEXT: tbnz w1, #0, .LBB1_2
br i1 %cond, label %exit, label %vector.body
vector.body:
- %1 = load <32 x i32>, <32 x i32>* %dst, align 16
+ %1 = load <32 x i32>, ptr %dst, align 16
%predphi = select <32 x i1> %broadcast.splat, <32 x i32> zeroinitializer, <32 x i32> %1
- store <32 x i32> %predphi, <32 x i32>* %dst, align 16
+ store <32 x i32> %predphi, ptr %dst, align 16
br label %exit
exit:
ret <16 x i8> %splat
}
-define void @splat_v32i8(i8 %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @splat_v32i8(i8 %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: splat_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: ret
%insert = insertelement <32 x i8> undef, i8 %a, i64 0
%splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
- store <32 x i8> %splat, <32 x i8>* %b
+ store <32 x i8> %splat, ptr %b
ret void
}
-define void @splat_v64i8(i8 %a, <64 x i8>* %b) #0 {
+define void @splat_v64i8(i8 %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: splat_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: ret
%insert = insertelement <64 x i8> undef, i8 %a, i64 0
%splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
- store <64 x i8> %splat, <64 x i8>* %b
+ store <64 x i8> %splat, ptr %b
ret void
}
-define void @splat_v128i8(i8 %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @splat_v128i8(i8 %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: splat_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: ret
%insert = insertelement <128 x i8> undef, i8 %a, i64 0
%splat = shufflevector <128 x i8> %insert, <128 x i8> undef, <128 x i32> zeroinitializer
- store <128 x i8> %splat, <128 x i8>* %b
+ store <128 x i8> %splat, ptr %b
ret void
}
-define void @splat_v256i8(i8 %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @splat_v256i8(i8 %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: splat_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: ret
%insert = insertelement <256 x i8> undef, i8 %a, i64 0
%splat = shufflevector <256 x i8> %insert, <256 x i8> undef, <256 x i32> zeroinitializer
- store <256 x i8> %splat, <256 x i8>* %b
+ store <256 x i8> %splat, ptr %b
ret void
}
ret <8 x i16> %splat
}
-define void @splat_v16i16(i16 %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @splat_v16i16(i16 %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: splat_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ret
%insert = insertelement <16 x i16> undef, i16 %a, i64 0
%splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
- store <16 x i16> %splat, <16 x i16>* %b
+ store <16 x i16> %splat, ptr %b
ret void
}
-define void @splat_v32i16(i16 %a, <32 x i16>* %b) #0 {
+define void @splat_v32i16(i16 %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: splat_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: ret
%insert = insertelement <32 x i16> undef, i16 %a, i64 0
%splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
- store <32 x i16> %splat, <32 x i16>* %b
+ store <32 x i16> %splat, ptr %b
ret void
}
-define void @splat_v64i16(i16 %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @splat_v64i16(i16 %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: splat_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ret
%insert = insertelement <64 x i16> undef, i16 %a, i64 0
%splat = shufflevector <64 x i16> %insert, <64 x i16> undef, <64 x i32> zeroinitializer
- store <64 x i16> %splat, <64 x i16>* %b
+ store <64 x i16> %splat, ptr %b
ret void
}
-define void @splat_v128i16(i16 %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @splat_v128i16(i16 %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: splat_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ret
%insert = insertelement <128 x i16> undef, i16 %a, i64 0
%splat = shufflevector <128 x i16> %insert, <128 x i16> undef, <128 x i32> zeroinitializer
- store <128 x i16> %splat, <128 x i16>* %b
+ store <128 x i16> %splat, ptr %b
ret void
}
ret <4 x i32> %splat
}
-define void @splat_v8i32(i32 %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @splat_v8i32(i32 %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: splat_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ret
%insert = insertelement <8 x i32> undef, i32 %a, i64 0
%splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
- store <8 x i32> %splat, <8 x i32>* %b
+ store <8 x i32> %splat, ptr %b
ret void
}
-define void @splat_v16i32(i32 %a, <16 x i32>* %b) #0 {
+define void @splat_v16i32(i32 %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: splat_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: ret
%insert = insertelement <16 x i32> undef, i32 %a, i64 0
%splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
- store <16 x i32> %splat, <16 x i32>* %b
+ store <16 x i32> %splat, ptr %b
ret void
}
-define void @splat_v32i32(i32 %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @splat_v32i32(i32 %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: splat_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ret
%insert = insertelement <32 x i32> undef, i32 %a, i64 0
%splat = shufflevector <32 x i32> %insert, <32 x i32> undef, <32 x i32> zeroinitializer
- store <32 x i32> %splat, <32 x i32>* %b
+ store <32 x i32> %splat, ptr %b
ret void
}
-define void @splat_v64i32(i32 %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @splat_v64i32(i32 %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: splat_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ret
%insert = insertelement <64 x i32> undef, i32 %a, i64 0
%splat = shufflevector <64 x i32> %insert, <64 x i32> undef, <64 x i32> zeroinitializer
- store <64 x i32> %splat, <64 x i32>* %b
+ store <64 x i32> %splat, ptr %b
ret void
}
ret <2 x i64> %splat
}
-define void @splat_v4i64(i64 %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @splat_v4i64(i64 %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: splat_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ret
%insert = insertelement <4 x i64> undef, i64 %a, i64 0
%splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
- store <4 x i64> %splat, <4 x i64>* %b
+ store <4 x i64> %splat, ptr %b
ret void
}
-define void @splat_v8i64(i64 %a, <8 x i64>* %b) #0 {
+define void @splat_v8i64(i64 %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: splat_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: ret
%insert = insertelement <8 x i64> undef, i64 %a, i64 0
%splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
- store <8 x i64> %splat, <8 x i64>* %b
+ store <8 x i64> %splat, ptr %b
ret void
}
-define void @splat_v16i64(i64 %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @splat_v16i64(i64 %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: splat_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ret
%insert = insertelement <16 x i64> undef, i64 %a, i64 0
%splat = shufflevector <16 x i64> %insert, <16 x i64> undef, <16 x i32> zeroinitializer
- store <16 x i64> %splat, <16 x i64>* %b
+ store <16 x i64> %splat, ptr %b
ret void
}
-define void @splat_v32i64(i64 %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @splat_v32i64(i64 %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: splat_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ret
%insert = insertelement <32 x i64> undef, i64 %a, i64 0
%splat = shufflevector <32 x i64> %insert, <32 x i64> undef, <32 x i32> zeroinitializer
- store <32 x i64> %splat, <32 x i64>* %b
+ store <32 x i64> %splat, ptr %b
ret void
}
ret <8 x half> %splat
}
-define void @splat_v16f16(half %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @splat_v16f16(half %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: splat_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%insert = insertelement <16 x half> undef, half %a, i64 0
%splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer
- store <16 x half> %splat, <16 x half>* %b
+ store <16 x half> %splat, ptr %b
ret void
}
-define void @splat_v32f16(half %a, <32 x half>* %b) #0 {
+define void @splat_v32f16(half %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: splat_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: ret
%insert = insertelement <32 x half> undef, half %a, i64 0
%splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
- store <32 x half> %splat, <32 x half>* %b
+ store <32 x half> %splat, ptr %b
ret void
}
-define void @splat_v64f16(half %a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @splat_v64f16(half %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: splat_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%insert = insertelement <64 x half> undef, half %a, i64 0
%splat = shufflevector <64 x half> %insert, <64 x half> undef, <64 x i32> zeroinitializer
- store <64 x half> %splat, <64 x half>* %b
+ store <64 x half> %splat, ptr %b
ret void
}
-define void @splat_v128f16(half %a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @splat_v128f16(half %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: splat_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%insert = insertelement <128 x half> undef, half %a, i64 0
%splat = shufflevector <128 x half> %insert, <128 x half> undef, <128 x i32> zeroinitializer
- store <128 x half> %splat, <128 x half>* %b
+ store <128 x half> %splat, ptr %b
ret void
}
ret <4 x float> %splat
}
-define void @splat_v8f32(float %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @splat_v8f32(float %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: splat_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: ret
%insert = insertelement <8 x float> undef, float %a, i64 0
%splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer
- store <8 x float> %splat, <8 x float>* %b
+ store <8 x float> %splat, ptr %b
ret void
}
-define void @splat_v16f32(float %a, <16 x float>* %b) #0 {
+define void @splat_v16f32(float %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: splat_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: ret
%insert = insertelement <16 x float> undef, float %a, i64 0
%splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
- store <16 x float> %splat, <16 x float>* %b
+ store <16 x float> %splat, ptr %b
ret void
}
-define void @splat_v32f32(float %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @splat_v32f32(float %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: splat_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: ret
%insert = insertelement <32 x float> undef, float %a, i64 0
%splat = shufflevector <32 x float> %insert, <32 x float> undef, <32 x i32> zeroinitializer
- store <32 x float> %splat, <32 x float>* %b
+ store <32 x float> %splat, ptr %b
ret void
}
-define void @splat_v64f32(float %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @splat_v64f32(float %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: splat_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: ret
%insert = insertelement <64 x float> undef, float %a, i64 0
%splat = shufflevector <64 x float> %insert, <64 x float> undef, <64 x i32> zeroinitializer
- store <64 x float> %splat, <64 x float>* %b
+ store <64 x float> %splat, ptr %b
ret void
}
ret <2 x double> %splat
}
-define void @splat_v4f64(double %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @splat_v4f64(double %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: splat_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ret
%insert = insertelement <4 x double> undef, double %a, i64 0
%splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer
- store <4 x double> %splat, <4 x double>* %b
+ store <4 x double> %splat, ptr %b
ret void
}
-define void @splat_v8f64(double %a, <8 x double>* %b) #0 {
+define void @splat_v8f64(double %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: splat_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: ret
%insert = insertelement <8 x double> undef, double %a, i64 0
%splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
- store <8 x double> %splat, <8 x double>* %b
+ store <8 x double> %splat, ptr %b
ret void
}
-define void @splat_v16f64(double %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @splat_v16f64(double %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: splat_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ret
%insert = insertelement <16 x double> undef, double %a, i64 0
%splat = shufflevector <16 x double> %insert, <16 x double> undef, <16 x i32> zeroinitializer
- store <16 x double> %splat, <16 x double>* %b
+ store <16 x double> %splat, ptr %b
ret void
}
-define void @splat_v32f64(double %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @splat_v32f64(double %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: splat_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ret
%insert = insertelement <32 x double> undef, double %a, i64 0
%splat = shufflevector <32 x double> %insert, <32 x double> undef, <32 x i32> zeroinitializer
- store <32 x double> %splat, <32 x double>* %b
+ store <32 x double> %splat, ptr %b
ret void
}
; DUP (integer immediate)
;
-define void @splat_imm_v64i8(<64 x i8>* %a) vscale_range(4,0) #0 {
+define void @splat_imm_v64i8(ptr %a) vscale_range(4,0) #0 {
; CHECK-LABEL: splat_imm_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, #1 // =0x1
; CHECK-NEXT: ret
%insert = insertelement <64 x i8> undef, i8 1, i64 0
%splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
- store <64 x i8> %splat, <64 x i8>* %a
+ store <64 x i8> %splat, ptr %a
ret void
}
-define void @splat_imm_v32i16(<32 x i16>* %a) vscale_range(4,0) #0 {
+define void @splat_imm_v32i16(ptr %a) vscale_range(4,0) #0 {
; CHECK-LABEL: splat_imm_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, #2 // =0x2
; CHECK-NEXT: ret
%insert = insertelement <32 x i16> undef, i16 2, i64 0
%splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
- store <32 x i16> %splat, <32 x i16>* %a
+ store <32 x i16> %splat, ptr %a
ret void
}
-define void @splat_imm_v16i32(<16 x i32>* %a) vscale_range(4,0) #0 {
+define void @splat_imm_v16i32(ptr %a) vscale_range(4,0) #0 {
; CHECK-LABEL: splat_imm_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, #3 // =0x3
; CHECK-NEXT: ret
%insert = insertelement <16 x i32> undef, i32 3, i64 0
%splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
- store <16 x i32> %splat, <16 x i32>* %a
+ store <16 x i32> %splat, ptr %a
ret void
}
-define void @splat_imm_v8i64(<8 x i64>* %a) vscale_range(4,0) #0 {
+define void @splat_imm_v8i64(ptr %a) vscale_range(4,0) #0 {
; CHECK-LABEL: splat_imm_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, #4 // =0x4
; CHECK-NEXT: ret
%insert = insertelement <8 x i64> undef, i64 4, i64 0
%splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
- store <8 x i64> %splat, <8 x i64>* %a
+ store <8 x i64> %splat, ptr %a
ret void
}
; DUP (floating-point immediate)
;
-define void @splat_imm_v32f16(<32 x half>* %a) vscale_range(4,0) #0 {
+define void @splat_imm_v32f16(ptr %a) vscale_range(4,0) #0 {
; CHECK-LABEL: splat_imm_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov z0.h, #5.00000000
; CHECK-NEXT: ret
%insert = insertelement <32 x half> undef, half 5.0, i64 0
%splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
- store <32 x half> %splat, <32 x half>* %a
+ store <32 x half> %splat, ptr %a
ret void
}
-define void @splat_imm_v16f32(<16 x float>* %a) vscale_range(4,0) #0 {
+define void @splat_imm_v16f32(ptr %a) vscale_range(4,0) #0 {
; CHECK-LABEL: splat_imm_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov z0.s, #6.00000000
; CHECK-NEXT: ret
%insert = insertelement <16 x float> undef, float 6.0, i64 0
%splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
- store <16 x float> %splat, <16 x float>* %a
+ store <16 x float> %splat, ptr %a
ret void
}
-define void @splat_imm_v8f64(<8 x double>* %a) vscale_range(4,0) #0 {
+define void @splat_imm_v8f64(ptr %a) vscale_range(4,0) #0 {
; CHECK-LABEL: splat_imm_v8f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov z0.d, #7.00000000
; CHECK-NEXT: ret
%insert = insertelement <8 x double> undef, double 7.0, i64 0
%splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
- store <8 x double> %splat, <8 x double>* %a
+ store <8 x double> %splat, ptr %a
ret void
}
target triple = "aarch64-unknown-linux-gnu"
; Don't use SVE for 64-bit vectors.
-define void @store_v2f32(<2 x float>* %a) #0 {
+define void @store_v2f32(ptr %a) #0 {
; CHECK-LABEL: store_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: str xzr, [x0]
; CHECK-NEXT: ret
- store <2 x float> zeroinitializer, <2 x float>* %a
+ store <2 x float> zeroinitializer, ptr %a
ret void
}
; Don't use SVE for 128-bit vectors.
-define void @store_v4f32(<4 x float>* %a) #0 {
+define void @store_v4f32(ptr %a) #0 {
; CHECK-LABEL: store_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp xzr, xzr, [x0]
; CHECK-NEXT: ret
- store <4 x float> zeroinitializer, <4 x float>* %a
+ store <4 x float> zeroinitializer, ptr %a
ret void
}
-define void @store_v8f32(<8 x float>* %a) #0 {
+define void @store_v8f32(ptr %a) #0 {
; CHECK-LABEL: store_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: mov z0.s, #0 // =0x0
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
- store <8 x float> zeroinitializer, <8 x float>* %a
+ store <8 x float> zeroinitializer, ptr %a
ret void
}
-define void @store_v16f32(<16 x float>* %a) #0 {
+define void @store_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: store_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_2048-NEXT: mov z0.s, #0 // =0x0
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_2048-NEXT: ret
- store <16 x float> zeroinitializer, <16 x float>* %a
+ store <16 x float> zeroinitializer, ptr %a
ret void
}
-define void @store_v32f32(<32 x float>* %a) #0 {
+define void @store_v32f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: store_v32f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #24
; VBITS_GE_2048-NEXT: mov z0.s, #0 // =0x0
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_2048-NEXT: ret
- store <32 x float> zeroinitializer, <32 x float>* %a
+ store <32 x float> zeroinitializer, ptr %a
ret void
}
-define void @store_v64f32(<64 x float>* %a) #0 {
+define void @store_v64f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: store_v64f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #56
; VBITS_GE_2048-NEXT: mov z0.s, #0 // =0x0
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_2048-NEXT: ret
- store <64 x float> zeroinitializer, <64 x float>* %a
+ store <64 x float> zeroinitializer, ptr %a
ret void
}
target triple = "aarch64-unknown-linux-gnu"
-define void @subvector_v8i16(<8 x i16> *%in, <8 x i16>* %out) vscale_range(2,0) #0 {
+define void @subvector_v8i16(ptr %in, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: subvector_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %a = load <8 x i16>, <8 x i16>* %in
+ %a = load <8 x i16>, ptr %in
br label %bb1
bb1:
- store <8 x i16> %a, <8 x i16>* %out
+ store <8 x i16> %a, ptr %out
ret void
}
-define void @subvector_v16i16(<16 x i16> *%in, <16 x i16>* %out) vscale_range(2,0) #0 {
+define void @subvector_v16i16(ptr %in, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: subvector_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <16 x i16>, <16 x i16>* %in
+ %a = load <16 x i16>, ptr %in
br label %bb1
bb1:
- store <16 x i16> %a, <16 x i16>* %out
+ store <16 x i16> %a, ptr %out
ret void
}
-define void @subvector_v32i16(<32 x i16> *%in, <32 x i16>* %out) #0 {
+define void @subvector_v32i16(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: subvector_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <32 x i16>, <32 x i16>* %in
+ %a = load <32 x i16>, ptr %in
br label %bb1
bb1:
- store <32 x i16> %a, <32 x i16>* %out
+ store <32 x i16> %a, ptr %out
ret void
}
-define void @subvector_v64i16(<64 x i16> *%in, <64 x i16>* %out) vscale_range(8,0) #0 {
+define void @subvector_v64i16(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: subvector_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <64 x i16>, <64 x i16>* %in
+ %a = load <64 x i16>, ptr %in
br label %bb1
bb1:
- store <64 x i16> %a, <64 x i16>* %out
+ store <64 x i16> %a, ptr %out
ret void
}
-define void @subvector_v8i32(<8 x i32> *%in, <8 x i32>* %out) vscale_range(2,0) #0 {
+define void @subvector_v8i32(ptr %in, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: subvector_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <8 x i32>, <8 x i32>* %in
+ %a = load <8 x i32>, ptr %in
br label %bb1
bb1:
- store <8 x i32> %a, <8 x i32>* %out
+ store <8 x i32> %a, ptr %out
ret void
}
-define void @subvector_v16i32(<16 x i32> *%in, <16 x i32>* %out) #0 {
+define void @subvector_v16i32(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: subvector_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <16 x i32>, <16 x i32>* %in
+ %a = load <16 x i32>, ptr %in
br label %bb1
bb1:
- store <16 x i32> %a, <16 x i32>* %out
+ store <16 x i32> %a, ptr %out
ret void
}
-define void @subvector_v32i32(<32 x i32> *%in, <32 x i32>* %out) vscale_range(8,0) #0 {
+define void @subvector_v32i32(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: subvector_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i32>, <32 x i32>* %in
+ %a = load <32 x i32>, ptr %in
br label %bb1
bb1:
- store <32 x i32> %a, <32 x i32>* %out
+ store <32 x i32> %a, ptr %out
ret void
}
-define void @subvector_v64i32(<64 x i32> *%in, <64 x i32>* %out) vscale_range(16,0) #0 {
+define void @subvector_v64i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: subvector_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <64 x i32>, <64 x i32>* %in
+ %a = load <64 x i32>, ptr %in
br label %bb1
bb1:
- store <64 x i32> %a, <64 x i32>* %out
+ store <64 x i32> %a, ptr %out
ret void
}
-define void @subvector_v8i64(<8 x i64> *%in, <8 x i64>* %out) vscale_range(2,0) #0 {
+define void @subvector_v8i64(ptr %in, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: subvector_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #4
; CHECK-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
; CHECK-NEXT: st1d { z1.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <8 x i64>, <8 x i64>* %in
+ %a = load <8 x i64>, ptr %in
br label %bb1
bb1:
- store <8 x i64> %a, <8 x i64>* %out
+ store <8 x i64> %a, ptr %out
ret void
}
-define void @subvector_v16i64(<16 x i64> *%in, <16 x i64>* %out) vscale_range(8,0) #0 {
+define void @subvector_v16i64(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: subvector_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <16 x i64>, <16 x i64>* %in
+ %a = load <16 x i64>, ptr %in
br label %bb1
bb1:
- store <16 x i64> %a, <16 x i64>* %out
+ store <16 x i64> %a, ptr %out
ret void
}
-define void @subvector_v32i64(<32 x i64> *%in, <32 x i64>* %out) vscale_range(16,0) #0 {
+define void @subvector_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: subvector_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i64>, <32 x i64>* %in
+ %a = load <32 x i64>, ptr %in
br label %bb1
bb1:
- store <32 x i64> %a, <32 x i64>* %out
+ store <32 x i64> %a, ptr %out
ret void
}
-define void @subvector_v8f16(<8 x half> *%in, <8 x half>* %out) vscale_range(2,0) #0 {
+define void @subvector_v8f16(ptr %in, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: subvector_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
- %a = load <8 x half>, <8 x half>* %in
+ %a = load <8 x half>, ptr %in
br label %bb1
bb1:
- store <8 x half> %a, <8 x half>* %out
+ store <8 x half> %a, ptr %out
ret void
}
-define void @subvector_v16f16(<16 x half> *%in, <16 x half>* %out) vscale_range(2,0) #0 {
+define void @subvector_v16f16(ptr %in, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: subvector_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <16 x half>, <16 x half>* %in
+ %a = load <16 x half>, ptr %in
br label %bb1
bb1:
- store <16 x half> %a, <16 x half>* %out
+ store <16 x half> %a, ptr %out
ret void
}
-define void @subvector_v32f16(<32 x half> *%in, <32 x half>* %out) #0 {
+define void @subvector_v32f16(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: subvector_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <32 x half>, <32 x half>* %in
+ %a = load <32 x half>, ptr %in
br label %bb1
bb1:
- store <32 x half> %a, <32 x half>* %out
+ store <32 x half> %a, ptr %out
ret void
}
-define void @subvector_v64f16(<64 x half> *%in, <64 x half>* %out) vscale_range(8,0) #0 {
+define void @subvector_v64f16(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: subvector_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <64 x half>, <64 x half>* %in
+ %a = load <64 x half>, ptr %in
br label %bb1
bb1:
- store <64 x half> %a, <64 x half>* %out
+ store <64 x half> %a, ptr %out
ret void
}
-define void @subvector_v8f32(<8 x float> *%in, <8 x float>* %out) vscale_range(2,0) #0 {
+define void @subvector_v8f32(ptr %in, ptr %out) vscale_range(2,0) #0 {
; CHECK-LABEL: subvector_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <8 x float>, <8 x float>* %in
+ %a = load <8 x float>, ptr %in
br label %bb1
bb1:
- store <8 x float> %a, <8 x float>* %out
+ store <8 x float> %a, ptr %out
ret void
}
-define void @subvector_v16f32(<16 x float> *%in, <16 x float>* %out) #0 {
+define void @subvector_v16f32(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: subvector_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <16 x float>, <16 x float>* %in
+ %a = load <16 x float>, ptr %in
br label %bb1
bb1:
- store <16 x float> %a, <16 x float>* %out
+ store <16 x float> %a, ptr %out
ret void
}
-define void @subvector_v32f32(<32 x float> *%in, <32 x float>* %out) vscale_range(8,0) #0 {
+define void @subvector_v32f32(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: subvector_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x float>, <32 x float>* %in
+ %a = load <32 x float>, ptr %in
br label %bb1
bb1:
- store <32 x float> %a, <32 x float>* %out
+ store <32 x float> %a, ptr %out
ret void
}
-define void @subvector_v64f32(<64 x float> *%in, <64 x float>* %out) vscale_range(16,0) #0 {
+define void @subvector_v64f32(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: subvector_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <64 x float>, <64 x float>* %in
+ %a = load <64 x float>, ptr %in
br label %bb1
bb1:
- store <64 x float> %a, <64 x float>* %out
+ store <64 x float> %a, ptr %out
ret void
}
-define void @subvector_v8f64(<8 x double> *%in, <8 x double>* %out) #0 {
+define void @subvector_v8f64(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: subvector_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <8 x double>, <8 x double>* %in
+ %a = load <8 x double>, ptr %in
br label %bb1
bb1:
- store <8 x double> %a, <8 x double>* %out
+ store <8 x double> %a, ptr %out
ret void
}
-define void @subvector_v16f64(<16 x double> *%in, <16 x double>* %out) vscale_range(8,0) #0 {
+define void @subvector_v16f64(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: subvector_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <16 x double>, <16 x double>* %in
+ %a = load <16 x double>, ptr %in
br label %bb1
bb1:
- store <16 x double> %a, <16 x double>* %out
+ store <16 x double> %a, ptr %out
ret void
}
-define void @subvector_v32f64(<32 x double> *%in, <32 x double>* %out) vscale_range(16,0) #0 {
+define void @subvector_v32f64(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: subvector_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x double>, <32 x double>* %in
+ %a = load <32 x double>, ptr %in
br label %bb1
bb1:
- store <32 x double> %a, <32 x double>* %out
+ store <32 x double> %a, ptr %out
ret void
}
-define <8 x i1> @no_warn_dropped_scalable(<8 x i32>* %in) #0 {
+define <8 x i1> @no_warn_dropped_scalable(ptr %in) #0 {
; CHECK-LABEL: no_warn_dropped_scalable:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %a = load <8 x i32>, <8 x i32>* %in
+ %a = load <8 x i32>, ptr %in
br label %bb1
bb1:
; combines remove redundant subvector operations. This test ensures it's not
; performed when the input idiom is the result of operation legalisation. When
; not prevented the test triggers infinite combine->legalise->combine->...
-define void @no_subvector_binop_hang(<8 x i32>* %in, <8 x i32>* %out, i1 %cond) #0 {
+define void @no_subvector_binop_hang(ptr %in, ptr %out, i1 %cond) #0 {
; CHECK-LABEL: no_subvector_binop_hang:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: .LBB23_2: // %bb.2
; CHECK-NEXT: ret
- %a = load <8 x i32>, <8 x i32>* %in
- %b = load <8 x i32>, <8 x i32>* %out
+ %a = load <8 x i32>, ptr %in
+ %b = load <8 x i32>, ptr %out
br i1 %cond, label %bb.1, label %bb.2
bb.1:
%or = or <8 x i32> %a, %b
- store <8 x i32> %or, <8 x i32>* %out
+ store <8 x i32> %or, ptr %out
br label %bb.2
bb.2:
target triple = "aarch64-unknown-linux-gnu"
-define void @store_trunc_v2i64i8(<2 x i64>* %ap, <2 x i8>* %dest) vscale_range(2,0) #0 {
+define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) vscale_range(2,0) #0 {
; CHECK-LABEL: store_trunc_v2i64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <2 x i64>, <2 x i64>* %ap
+ %a = load <2 x i64>, ptr %ap
%val = trunc <2 x i64> %a to <2 x i8>
- store <2 x i8> %val, <2 x i8>* %dest
+ store <2 x i8> %val, ptr %dest
ret void
}
-define void @store_trunc_v4i64i8(<4 x i64>* %ap, <4 x i8>* %dest) vscale_range(2,0) #0 {
+define void @store_trunc_v4i64i8(ptr %ap, ptr %dest) vscale_range(2,0) #0 {
; CHECK-LABEL: store_trunc_v4i64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <4 x i64>, <4 x i64>* %ap
+ %a = load <4 x i64>, ptr %ap
%val = trunc <4 x i64> %a to <4 x i8>
- store <4 x i8> %val, <4 x i8>* %dest
+ store <4 x i8> %val, ptr %dest
ret void
}
-define void @store_trunc_v8i64i8(<8 x i64>* %ap, <8 x i8>* %dest) #0 {
+define void @store_trunc_v8i64i8(ptr %ap, ptr %dest) #0 {
; VBITS_GE_256-LABEL: store_trunc_v8i64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <8 x i64>, <8 x i64>* %ap
+ %a = load <8 x i64>, ptr %ap
%val = trunc <8 x i64> %a to <8 x i8>
- store <8 x i8> %val, <8 x i8>* %dest
+ store <8 x i8> %val, ptr %dest
ret void
}
-define void @store_trunc_v16i64i8(<16 x i64>* %ap, <16 x i8>* %dest) vscale_range(8,0) #0 {
+define void @store_trunc_v16i64i8(ptr %ap, ptr %dest) vscale_range(8,0) #0 {
; CHECK-LABEL: store_trunc_v16i64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <16 x i64>, <16 x i64>* %ap
+ %a = load <16 x i64>, ptr %ap
%val = trunc <16 x i64> %a to <16 x i8>
- store <16 x i8> %val, <16 x i8>* %dest
+ store <16 x i8> %val, ptr %dest
ret void
}
-define void @store_trunc_v32i64i8(<32 x i64>* %ap, <32 x i8>* %dest) vscale_range(16,0) #0 {
+define void @store_trunc_v32i64i8(ptr %ap, ptr %dest) vscale_range(16,0) #0 {
; CHECK-LABEL: store_trunc_v32i64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i64>, <32 x i64>* %ap
+ %a = load <32 x i64>, ptr %ap
%val = trunc <32 x i64> %a to <32 x i8>
- store <32 x i8> %val, <32 x i8>* %dest
+ store <32 x i8> %val, ptr %dest
ret void
}
-define void @store_trunc_v8i64i16(<8 x i64>* %ap, <8 x i16>* %dest) #0 {
+define void @store_trunc_v8i64i16(ptr %ap, ptr %dest) #0 {
; Currently does not use the truncating store
; VBITS_GE_256-LABEL: store_trunc_v8i64i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <8 x i64>, <8 x i64>* %ap
+ %a = load <8 x i64>, ptr %ap
%val = trunc <8 x i64> %a to <8 x i16>
- store <8 x i16> %val, <8 x i16>* %dest
+ store <8 x i16> %val, ptr %dest
ret void
}
-define void @store_trunc_v8i64i32(<8 x i64>* %ap, <8 x i32>* %dest) #0 {
+define void @store_trunc_v8i64i32(ptr %ap, ptr %dest) #0 {
; VBITS_GE_256-LABEL: store_trunc_v8i64i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <8 x i64>, <8 x i64>* %ap
+ %a = load <8 x i64>, ptr %ap
%val = trunc <8 x i64> %a to <8 x i32>
- store <8 x i32> %val, <8 x i32>* %dest
+ store <8 x i32> %val, ptr %dest
ret void
}
-define void @store_trunc_v16i32i8(<16 x i32>* %ap, <16 x i8>* %dest) #0 {
+define void @store_trunc_v16i32i8(ptr %ap, ptr %dest) #0 {
; Currently does not use the truncating store
; VBITS_GE_256-LABEL: store_trunc_v16i32i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1b { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <16 x i32>, <16 x i32>* %ap
+ %a = load <16 x i32>, ptr %ap
%val = trunc <16 x i32> %a to <16 x i8>
- store <16 x i8> %val, <16 x i8>* %dest
+ store <16 x i8> %val, ptr %dest
ret void
}
-define void @store_trunc_v16i32i16(<16 x i32>* %ap, <16 x i16>* %dest) #0 {
+define void @store_trunc_v16i32i16(ptr %ap, ptr %dest) #0 {
; VBITS_GE_256-LABEL: store_trunc_v16i32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <16 x i32>, <16 x i32>* %ap
+ %a = load <16 x i32>, ptr %ap
%val = trunc <16 x i32> %a to <16 x i16>
- store <16 x i16> %val, <16 x i16>* %dest
+ store <16 x i16> %val, ptr %dest
ret void
}
-define void @store_trunc_v32i16i8(<32 x i16>* %ap, <32 x i8>* %dest) #0 {
+define void @store_trunc_v32i16i8(ptr %ap, ptr %dest) #0 {
; VBITS_GE_256-LABEL: store_trunc_v32i16i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1b { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <32 x i16>, <32 x i16>* %ap
+ %a = load <32 x i16>, ptr %ap
%val = trunc <32 x i16> %a to <32 x i8>
- store <32 x i8> %val, <32 x i8>* %dest
+ store <32 x i8> %val, ptr %dest
ret void
}
; truncate i16 -> i8
;
-define <16 x i8> @trunc_v16i16_v16i8(<16 x i16>* %in) vscale_range(2,0) #0 {
+define <16 x i8> @trunc_v16i16_v16i8(ptr %in) vscale_range(2,0) #0 {
; CHECK-LABEL: trunc_v16i16_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
- %a = load <16 x i16>, <16 x i16>* %in
+ %a = load <16 x i16>, ptr %in
%b = trunc <16 x i16> %a to <16 x i8>
ret <16 x i8> %b
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v32i16_v32i8(<32 x i16>* %in, <32 x i8>* %out) #0 {
+define void @trunc_v32i16_v32i8(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: trunc_v32i16_v32i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: add z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <32 x i16>, <32 x i16>* %in
+ %a = load <32 x i16>, ptr %in
%b = trunc <32 x i16> %a to <32 x i8>
%c = add <32 x i8> %b, %b
- store <32 x i8> %c, <32 x i8>* %out
+ store <32 x i8> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v64i16_v64i8(<64 x i16>* %in, <64 x i8>* %out) vscale_range(8,0) #0 {
+define void @trunc_v64i16_v64i8(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: trunc_v64i16_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: add z0.b, z0.b, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <64 x i16>, <64 x i16>* %in
+ %a = load <64 x i16>, ptr %in
%b = trunc <64 x i16> %a to <64 x i8>
%c = add <64 x i8> %b, %b
- store <64 x i8> %c, <64 x i8>* %out
+ store <64 x i8> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v128i16_v128i8(<128 x i16>* %in, <128 x i8>* %out) vscale_range(16,0) #0 {
+define void @trunc_v128i16_v128i8(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: trunc_v128i16_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: add z0.b, z0.b, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <128 x i16>, <128 x i16>* %in
+ %a = load <128 x i16>, ptr %in
%b = trunc <128 x i16> %a to <128 x i8>
%c = add <128 x i8> %b, %b
- store <128 x i8> %c, <128 x i8>* %out
+ store <128 x i8> %c, ptr %out
ret void
}
; truncate i32 -> i8
;
-define <8 x i8> @trunc_v8i32_v8i8(<8 x i32>* %in) vscale_range(2,0) #0 {
+define <8 x i8> @trunc_v8i32_v8i8(ptr %in) vscale_range(2,0) #0 {
; CHECK-LABEL: trunc_v8i32_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %a = load <8 x i32>, <8 x i32>* %in
+ %a = load <8 x i32>, ptr %in
%b = trunc <8 x i32> %a to <8 x i8>
ret <8 x i8> %b
}
-define <16 x i8> @trunc_v16i32_v16i8(<16 x i32>* %in) #0 {
+define <16 x i8> @trunc_v16i32_v16i8(ptr %in) #0 {
; VBITS_GE_256-LABEL: trunc_v16i32_v16i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
; VBITS_GE_512-NEXT: ret
- %a = load <16 x i32>, <16 x i32>* %in
+ %a = load <16 x i32>, ptr %in
%b = trunc <16 x i32> %a to <16 x i8>
ret <16 x i8> %b
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v32i32_v32i8(<32 x i32>* %in, <32 x i8>* %out) vscale_range(8,0) #0 {
+define void @trunc_v32i32_v32i8(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: trunc_v32i32_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: add z0.b, z0.b, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i32>, <32 x i32>* %in
+ %a = load <32 x i32>, ptr %in
%b = trunc <32 x i32> %a to <32 x i8>
%c = add <32 x i8> %b, %b
- store <32 x i8> %c, <32 x i8>* %out
+ store <32 x i8> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v64i32_v64i8(<64 x i32>* %in, <64 x i8>* %out) vscale_range(16,0) #0 {
+define void @trunc_v64i32_v64i8(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: trunc_v64i32_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: add z0.b, z0.b, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <64 x i32>, <64 x i32>* %in
+ %a = load <64 x i32>, ptr %in
%b = trunc <64 x i32> %a to <64 x i8>
%c = add <64 x i8> %b, %b
- store <64 x i8> %c, <64 x i8>* %out
+ store <64 x i8> %c, ptr %out
ret void
}
; truncate i32 -> i16
;
-define <8 x i16> @trunc_v8i32_v8i16(<8 x i32>* %in) vscale_range(2,0) #0 {
+define <8 x i16> @trunc_v8i32_v8i16(ptr %in) vscale_range(2,0) #0 {
; CHECK-LABEL: trunc_v8i32_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
- %a = load <8 x i32>, <8 x i32>* %in
+ %a = load <8 x i32>, ptr %in
%b = trunc <8 x i32> %a to <8 x i16>
ret <8 x i16> %b
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v16i32_v16i16(<16 x i32>* %in, <16 x i16>* %out) #0 {
+define void @trunc_v16i32_v16i16(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: trunc_v16i32_v16i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: add z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <16 x i32>, <16 x i32>* %in
+ %a = load <16 x i32>, ptr %in
%b = trunc <16 x i32> %a to <16 x i16>
%c = add <16 x i16> %b, %b
- store <16 x i16> %c, <16 x i16>* %out
+ store <16 x i16> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v32i32_v32i16(<32 x i32>* %in, <32 x i16>* %out) vscale_range(8,0) #0 {
+define void @trunc_v32i32_v32i16(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: trunc_v32i32_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: add z0.h, z0.h, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i32>, <32 x i32>* %in
+ %a = load <32 x i32>, ptr %in
%b = trunc <32 x i32> %a to <32 x i16>
%c = add <32 x i16> %b, %b
- store <32 x i16> %c, <32 x i16>* %out
+ store <32 x i16> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v64i32_v64i16(<64 x i32>* %in, <64 x i16>* %out) vscale_range(16,0) #0 {
+define void @trunc_v64i32_v64i16(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: trunc_v64i32_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: add z0.h, z0.h, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <64 x i32>, <64 x i32>* %in
+ %a = load <64 x i32>, ptr %in
%b = trunc <64 x i32> %a to <64 x i16>
%c = add <64 x i16> %b, %b
- store <64 x i16> %c, <64 x i16>* %out
+ store <64 x i16> %c, ptr %out
ret void
}
;
; NOTE: v4i8 is not legal so result i8 elements are held within i16 containers.
-define <4 x i8> @trunc_v4i64_v4i8(<4 x i64>* %in) vscale_range(2,0) #0 {
+define <4 x i8> @trunc_v4i64_v4i8(ptr %in) vscale_range(2,0) #0 {
; CHECK-LABEL: trunc_v4i64_v4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %a = load <4 x i64>, <4 x i64>* %in
+ %a = load <4 x i64>, ptr %in
%b = trunc <4 x i64> %a to <4 x i8>
ret <4 x i8> %b
}
-define <8 x i8> @trunc_v8i64_v8i8(<8 x i64>* %in) #0 {
+define <8 x i8> @trunc_v8i64_v8i8(ptr %in) #0 {
; VBITS_GE_256-LABEL: trunc_v8i64_v8i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 killed $z0
; VBITS_GE_512-NEXT: ret
- %a = load <8 x i64>, <8 x i64>* %in
+ %a = load <8 x i64>, ptr %in
%b = trunc <8 x i64> %a to <8 x i8>
ret <8 x i8> %b
}
-define <16 x i8> @trunc_v16i64_v16i8(<16 x i64>* %in) vscale_range(8,0) #0 {
+define <16 x i8> @trunc_v16i64_v16i8(ptr %in) vscale_range(8,0) #0 {
; CHECK-LABEL: trunc_v16i64_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
- %a = load <16 x i64>, <16 x i64>* %in
+ %a = load <16 x i64>, ptr %in
%b = trunc <16 x i64> %a to <16 x i8>
ret <16 x i8> %b
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v32i64_v32i8(<32 x i64>* %in, <32 x i8>* %out) vscale_range(16,0) #0 {
+define void @trunc_v32i64_v32i8(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: trunc_v32i64_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: add z0.b, z0.b, z0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i64>, <32 x i64>* %in
+ %a = load <32 x i64>, ptr %in
%b = trunc <32 x i64> %a to <32 x i8>
%c = add <32 x i8> %b, %b
- store <32 x i8> %c, <32 x i8>* %out
+ store <32 x i8> %c, ptr %out
ret void
}
; truncate i64 -> i16
;
-define <4 x i16> @trunc_v4i64_v4i16(<4 x i64>* %in) vscale_range(2,0) #0 {
+define <4 x i16> @trunc_v4i64_v4i16(ptr %in) vscale_range(2,0) #0 {
; CHECK-LABEL: trunc_v4i64_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
- %a = load <4 x i64>, <4 x i64>* %in
+ %a = load <4 x i64>, ptr %in
%b = trunc <4 x i64> %a to <4 x i16>
ret <4 x i16> %b
}
-define <8 x i16> @trunc_v8i64_v8i16(<8 x i64>* %in) #0 {
+define <8 x i16> @trunc_v8i64_v8i16(ptr %in) #0 {
; VBITS_GE_256-LABEL: trunc_v8i64_v8i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
; VBITS_GE_512-NEXT: ret
- %a = load <8 x i64>, <8 x i64>* %in
+ %a = load <8 x i64>, ptr %in
%b = trunc <8 x i64> %a to <8 x i16>
ret <8 x i16> %b
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v16i64_v16i16(<16 x i64>* %in, <16 x i16>* %out) vscale_range(8,0) #0 {
+define void @trunc_v16i64_v16i16(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: trunc_v16i64_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: add z0.h, z0.h, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <16 x i64>, <16 x i64>* %in
+ %a = load <16 x i64>, ptr %in
%b = trunc <16 x i64> %a to <16 x i16>
%c = add <16 x i16> %b, %b
- store <16 x i16> %c, <16 x i16>* %out
+ store <16 x i16> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v32i64_v32i16(<32 x i64>* %in, <32 x i16>* %out) vscale_range(16,0) #0 {
+define void @trunc_v32i64_v32i16(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: trunc_v32i64_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: add z0.h, z0.h, z0.h
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i64>, <32 x i64>* %in
+ %a = load <32 x i64>, ptr %in
%b = trunc <32 x i64> %a to <32 x i16>
%c = add <32 x i16> %b, %b
- store <32 x i16> %c, <32 x i16>* %out
+ store <32 x i16> %c, ptr %out
ret void
}
; truncate i64 -> i32
;
-define <4 x i32> @trunc_v4i64_v4i32(<4 x i64>* %in) vscale_range(2,0) #0 {
+define <4 x i32> @trunc_v4i64_v4i32(ptr %in) vscale_range(2,0) #0 {
; CHECK-LABEL: trunc_v4i64_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
- %a = load <4 x i64>, <4 x i64>* %in
+ %a = load <4 x i64>, ptr %in
%b = trunc <4 x i64> %a to <4 x i32>
ret <4 x i32> %b
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v8i64_v8i32(<8 x i64>* %in, <8 x i32>* %out) #0 {
+define void @trunc_v8i64_v8i32(ptr %in, ptr %out) #0 {
; VBITS_GE_256-LABEL: trunc_v8i64_v8i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: add z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
- %a = load <8 x i64>, <8 x i64>* %in
+ %a = load <8 x i64>, ptr %in
%b = trunc <8 x i64> %a to <8 x i32>
%c = add <8 x i32> %b, %b
- store <8 x i32> %c, <8 x i32>* %out
+ store <8 x i32> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v16i64_v16i32(<16 x i64>* %in, <16 x i32>* %out) vscale_range(8,0) #0 {
+define void @trunc_v16i64_v16i32(ptr %in, ptr %out) vscale_range(8,0) #0 {
; CHECK-LABEL: trunc_v16i64_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: add z0.s, z0.s, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <16 x i64>, <16 x i64>* %in
+ %a = load <16 x i64>, ptr %in
%b = trunc <16 x i64> %a to <16 x i32>
%c = add <16 x i32> %b, %b
- store <16 x i32> %c, <16 x i32>* %out
+ store <16 x i32> %c, ptr %out
ret void
}
; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
-define void @trunc_v32i64_v32i32(<32 x i64>* %in, <32 x i32>* %out) vscale_range(16,0) #0 {
+define void @trunc_v32i64_v32i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
; CHECK-LABEL: trunc_v32i64_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: add z0.s, z0.s, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
- %a = load <32 x i64>, <32 x i64>* %in
+ %a = load <32 x i64>, ptr %in
%b = trunc <32 x i64> %a to <32 x i32>
%c = add <32 x i32> %b, %b
- store <32 x i32> %c, <32 x i32>* %out
+ store <32 x i32> %c, ptr %out
ret void
}
ret <16 x i8> %ret
}
-define void @shuffle_ext_byone_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
+define void @shuffle_ext_byone_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: insr z1.b, w8
; CHECK-NEXT: st1b { z1.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i8>, <32 x i8>* %a
- %op2 = load <32 x i8>, <32 x i8>* %b
+ %op1 = load <32 x i8>, ptr %a
+ %op2 = load <32 x i8>, ptr %b
%ret = shufflevector <32 x i8> %op1, <32 x i8> %op2, <32 x i32> <i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,
i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46,
i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54,
i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
- store <32 x i8> %ret, <32 x i8>* %a
+ store <32 x i8> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
+define void @shuffle_ext_byone_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: shuffle_ext_byone_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
; VBITS_GE_512-NEXT: insr z1.b, w8
; VBITS_GE_512-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <64 x i8>, <64 x i8>* %a
- %op2 = load <64 x i8>, <64 x i8>* %b
+ %op1 = load <64 x i8>, ptr %a
+ %op2 = load <64 x i8>, ptr %b
%ret = shufflevector <64 x i8> %op1, <64 x i8> %op2, <64 x i32> <i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70,
i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78,
i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86,
i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110,
i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118,
i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126>
- store <64 x i8> %ret, <64 x i8>* %a
+ store <64 x i8> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
+define void @shuffle_ext_byone_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: insr z1.b, w8
; CHECK-NEXT: st1b { z1.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i8>, <128 x i8>* %a
- %op2 = load <128 x i8>, <128 x i8>* %b
+ %op1 = load <128 x i8>, ptr %a
+ %op2 = load <128 x i8>, ptr %b
%ret = shufflevector <128 x i8> %op1, <128 x i8> %op2, <128 x i32> <i32 127, i32 128, i32 129, i32 130, i32 131, i32 132, i32 133, i32 134,
i32 135, i32 136, i32 137, i32 138, i32 139, i32 140, i32 141, i32 142,
i32 143, i32 144, i32 145, i32 146, i32 147, i32 148, i32 149, i32 150,
i32 231, i32 232, i32 233, i32 234, i32 235, i32 236, i32 237, i32 238,
i32 239, i32 240, i32 241, i32 242, i32 243, i32 244, i32 245, i32 246,
i32 247, i32 248, i32 249, i32 250, i32 251, i32 252, i32 253, i32 254>
- store <128 x i8> %ret, <128 x i8>* %a
+ store <128 x i8> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
+define void @shuffle_ext_byone_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: insr z1.b, w8
; CHECK-NEXT: st1b { z1.b }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <256 x i8>, <256 x i8>* %a
- %op2 = load <256 x i8>, <256 x i8>* %b
+ %op1 = load <256 x i8>, ptr %a
+ %op2 = load <256 x i8>, ptr %b
%ret = shufflevector <256 x i8> %op1, <256 x i8> %op2, <256 x i32> <i32 255, i32 256, i32 257, i32 258, i32 259, i32 260, i32 261, i32 262,
i32 263, i32 264, i32 265, i32 266, i32 267, i32 268, i32 269, i32 270,
i32 271, i32 272, i32 273, i32 274, i32 275, i32 276, i32 277, i32 278,
i32 487, i32 488, i32 489, i32 490, i32 491, i32 492, i32 493, i32 494,
i32 495, i32 496, i32 497, i32 498, i32 499, i32 500, i32 501, i32 502,
i32 503, i32 504, i32 505, i32 506, i32 507, i32 508, i32 509, i32 510>
- store <256 x i8> %ret, <256 x i8>* %a
+ store <256 x i8> %ret, ptr %a
ret void
}
ret <8 x i16> %ret
}
-define void @shuffle_ext_byone_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
+define void @shuffle_ext_byone_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: insr z1.h, w8
; CHECK-NEXT: st1h { z1.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i16>, <16 x i16>* %a
- %op2 = load <16 x i16>, <16 x i16>* %b
+ %op1 = load <16 x i16>, ptr %a
+ %op2 = load <16 x i16>, ptr %b
%ret = shufflevector <16 x i16> %op1, <16 x i16> %op2, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22,
i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
- store <16 x i16> %ret, <16 x i16>* %a
+ store <16 x i16> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
+define void @shuffle_ext_byone_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: shuffle_ext_byone_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: insr z1.h, w8
; VBITS_GE_512-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x i16>, <32 x i16>* %a
- %op2 = load <32 x i16>, <32 x i16>* %b
+ %op1 = load <32 x i16>, ptr %a
+ %op2 = load <32 x i16>, ptr %b
%ret = shufflevector <32 x i16> %op1, <32 x i16> %op2, <32 x i32> <i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,
i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46,
i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54,
i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
- store <32 x i16> %ret, <32 x i16>* %a
+ store <32 x i16> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
+define void @shuffle_ext_byone_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: insr z1.h, w8
; CHECK-NEXT: st1h { z1.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i16>, <64 x i16>* %a
- %op2 = load <64 x i16>, <64 x i16>* %b
+ %op1 = load <64 x i16>, ptr %a
+ %op2 = load <64 x i16>, ptr %b
%ret = shufflevector <64 x i16> %op1, <64 x i16> %op2, <64 x i32> <i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70,
i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78,
i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86,
i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110,
i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118,
i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126>
- store <64 x i16> %ret, <64 x i16>* %a
+ store <64 x i16> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
+define void @shuffle_ext_byone_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: insr z1.h, w8
; CHECK-NEXT: st1h { z1.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x i16>, <128 x i16>* %a
- %op2 = load <128 x i16>, <128 x i16>* %b
+ %op1 = load <128 x i16>, ptr %a
+ %op2 = load <128 x i16>, ptr %b
%ret = shufflevector <128 x i16> %op1, <128 x i16> %op2, <128 x i32> <i32 127, i32 128, i32 129, i32 130, i32 131, i32 132, i32 133, i32 134,
i32 135, i32 136, i32 137, i32 138, i32 139, i32 140, i32 141, i32 142,
i32 143, i32 144, i32 145, i32 146, i32 147, i32 148, i32 149, i32 150,
i32 231, i32 232, i32 233, i32 234, i32 235, i32 236, i32 237, i32 238,
i32 239, i32 240, i32 241, i32 242, i32 243, i32 244, i32 245, i32 246,
i32 247, i32 248, i32 249, i32 250, i32 251, i32 252, i32 253, i32 254>
- store <128 x i16> %ret, <128 x i16>* %a
+ store <128 x i16> %ret, ptr %a
ret void
}
ret <4 x i32> %ret
}
-define void @shuffle_ext_byone_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
+define void @shuffle_ext_byone_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: insr z1.s, w8
; CHECK-NEXT: st1w { z1.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x i32>, <8 x i32>* %a
- %op2 = load <8 x i32>, <8 x i32>* %b
+ %op1 = load <8 x i32>, ptr %a
+ %op2 = load <8 x i32>, ptr %b
%ret = shufflevector <8 x i32> %op1, <8 x i32> %op2, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
- store <8 x i32> %ret, <8 x i32>* %a
+ store <8 x i32> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
+define void @shuffle_ext_byone_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: shuffle_ext_byone_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: insr z1.s, w8
; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x i32>, <16 x i32>* %a
- %op2 = load <16 x i32>, <16 x i32>* %b
+ %op1 = load <16 x i32>, ptr %a
+ %op2 = load <16 x i32>, ptr %b
%ret = shufflevector <16 x i32> %op1, <16 x i32> %op2, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22,
i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
- store <16 x i32> %ret, <16 x i32>* %a
+ store <16 x i32> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
+define void @shuffle_ext_byone_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: insr z1.s, w8
; CHECK-NEXT: st1w { z1.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i32>, <32 x i32>* %a
- %op2 = load <32 x i32>, <32 x i32>* %b
+ %op1 = load <32 x i32>, ptr %a
+ %op2 = load <32 x i32>, ptr %b
%ret = shufflevector <32 x i32> %op1, <32 x i32> %op2, <32 x i32> <i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,
i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46,
i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54,
i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
- store <32 x i32> %ret, <32 x i32>* %a
+ store <32 x i32> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
+define void @shuffle_ext_byone_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: insr z1.s, w8
; CHECK-NEXT: st1w { z1.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x i32>, <64 x i32>* %a
- %op2 = load <64 x i32>, <64 x i32>* %b
+ %op1 = load <64 x i32>, ptr %a
+ %op2 = load <64 x i32>, ptr %b
%ret = shufflevector <64 x i32> %op1, <64 x i32> %op2, <64 x i32> <i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70,
i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78,
i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86,
i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110,
i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118,
i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126>
- store <64 x i32> %ret, <64 x i32>* %a
+ store <64 x i32> %ret, ptr %a
ret void
}
ret <2 x i64> %ret
}
-define void @shuffle_ext_byone_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
+define void @shuffle_ext_byone_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: insr z1.d, x8
; CHECK-NEXT: st1d { z1.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x i64>, <4 x i64>* %a
- %op2 = load <4 x i64>, <4 x i64>* %b
+ %op1 = load <4 x i64>, ptr %a
+ %op2 = load <4 x i64>, ptr %b
%ret = shufflevector <4 x i64> %op1, <4 x i64> %op2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
- store <4 x i64> %ret, <4 x i64>* %a
+ store <4 x i64> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
+define void @shuffle_ext_byone_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: shuffle_ext_byone_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: insr z1.d, x8
; VBITS_GE_512-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x i64>, <8 x i64>* %a
- %op2 = load <8 x i64>, <8 x i64>* %b
+ %op1 = load <8 x i64>, ptr %a
+ %op2 = load <8 x i64>, ptr %b
%ret = shufflevector <8 x i64> %op1, <8 x i64> %op2, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
- store <8 x i64> %ret, <8 x i64>* %a
+ store <8 x i64> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
+define void @shuffle_ext_byone_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: insr z1.d, x8
; CHECK-NEXT: st1d { z1.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x i64>, <16 x i64>* %a
- %op2 = load <16 x i64>, <16 x i64>* %b
+ %op1 = load <16 x i64>, ptr %a
+ %op2 = load <16 x i64>, ptr %b
%ret = shufflevector <16 x i64> %op1, <16 x i64> %op2, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22,
i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
- store <16 x i64> %ret, <16 x i64>* %a
+ store <16 x i64> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
+define void @shuffle_ext_byone_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: insr z1.d, x8
; CHECK-NEXT: st1d { z1.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x i64>, <32 x i64>* %a
- %op2 = load <32 x i64>, <32 x i64>* %b
+ %op1 = load <32 x i64>, ptr %a
+ %op2 = load <32 x i64>, ptr %b
%ret = shufflevector <32 x i64> %op1, <32 x i64> %op2, <32 x i32> <i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,
i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46,
i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54,
i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
- store <32 x i64> %ret, <32 x i64>* %a
+ store <32 x i64> %ret, ptr %a
ret void
}
ret <8 x half> %ret
}
-define void @shuffle_ext_byone_v16f16(<16 x half>* %a, <16 x half>* %b) vscale_range(2,0) #0 {
+define void @shuffle_ext_byone_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: insr z1.h, h0
; CHECK-NEXT: st1h { z1.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x half>, <16 x half>* %a
- %op2 = load <16 x half>, <16 x half>* %b
+ %op1 = load <16 x half>, ptr %a
+ %op2 = load <16 x half>, ptr %b
%ret = shufflevector <16 x half> %op1, <16 x half> %op2, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22,
i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
- store <16 x half> %ret, <16 x half>* %a
+ store <16 x half> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
+define void @shuffle_ext_byone_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: shuffle_ext_byone_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
; VBITS_GE_512-NEXT: insr z1.h, h0
; VBITS_GE_512-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <32 x half>, <32 x half>* %a
- %op2 = load <32 x half>, <32 x half>* %b
+ %op1 = load <32 x half>, ptr %a
+ %op2 = load <32 x half>, ptr %b
%ret = shufflevector <32 x half> %op1, <32 x half> %op2, <32 x i32> <i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,
i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46,
i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54,
i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
- store <32 x half> %ret, <32 x half>* %a
+ store <32 x half> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v64f16(<64 x half>* %a, <64 x half>* %b) vscale_range(8,0) #0 {
+define void @shuffle_ext_byone_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v64f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: insr z1.h, h0
; CHECK-NEXT: st1h { z1.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x half>, <64 x half>* %a
- %op2 = load <64 x half>, <64 x half>* %b
+ %op1 = load <64 x half>, ptr %a
+ %op2 = load <64 x half>, ptr %b
%ret = shufflevector <64 x half> %op1, <64 x half> %op2, <64 x i32> <i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70,
i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78,
i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86,
i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110,
i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118,
i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126>
- store <64 x half> %ret, <64 x half>* %a
+ store <64 x half> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v128f16(<128 x half>* %a, <128 x half>* %b) vscale_range(16,0) #0 {
+define void @shuffle_ext_byone_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v128f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: insr z1.h, h0
; CHECK-NEXT: st1h { z1.h }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <128 x half>, <128 x half>* %a
- %op2 = load <128 x half>, <128 x half>* %b
+ %op1 = load <128 x half>, ptr %a
+ %op2 = load <128 x half>, ptr %b
%ret = shufflevector <128 x half> %op1, <128 x half> %op2, <128 x i32> <i32 127, i32 128, i32 129, i32 130, i32 131, i32 132, i32 133, i32 134,
i32 135, i32 136, i32 137, i32 138, i32 139, i32 140, i32 141, i32 142,
i32 143, i32 144, i32 145, i32 146, i32 147, i32 148, i32 149, i32 150,
i32 231, i32 232, i32 233, i32 234, i32 235, i32 236, i32 237, i32 238,
i32 239, i32 240, i32 241, i32 242, i32 243, i32 244, i32 245, i32 246,
i32 247, i32 248, i32 249, i32 250, i32 251, i32 252, i32 253, i32 254>
- store <128 x half> %ret, <128 x half>* %a
+ store <128 x half> %ret, ptr %a
ret void
}
ret <4 x float> %ret
}
-define void @shuffle_ext_byone_v8f32(<8 x float>* %a, <8 x float>* %b) vscale_range(2,0) #0 {
+define void @shuffle_ext_byone_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: insr z1.s, s0
; CHECK-NEXT: st1w { z1.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <8 x float>, <8 x float>* %a
- %op2 = load <8 x float>, <8 x float>* %b
+ %op1 = load <8 x float>, ptr %a
+ %op2 = load <8 x float>, ptr %b
%ret = shufflevector <8 x float> %op1, <8 x float> %op2, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
- store <8 x float> %ret, <8 x float>* %a
+ store <8 x float> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
+define void @shuffle_ext_byone_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: shuffle_ext_byone_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
; VBITS_GE_512-NEXT: insr z1.s, s0
; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <16 x float>, <16 x float>* %a
- %op2 = load <16 x float>, <16 x float>* %b
+ %op1 = load <16 x float>, ptr %a
+ %op2 = load <16 x float>, ptr %b
%ret = shufflevector <16 x float> %op1, <16 x float> %op2, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22,
i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
- store <16 x float> %ret, <16 x float>* %a
+ store <16 x float> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v32f32(<32 x float>* %a, <32 x float>* %b) vscale_range(8,0) #0 {
+define void @shuffle_ext_byone_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: insr z1.s, s0
; CHECK-NEXT: st1w { z1.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x float>, <32 x float>* %a
- %op2 = load <32 x float>, <32 x float>* %b
+ %op1 = load <32 x float>, ptr %a
+ %op2 = load <32 x float>, ptr %b
%ret = shufflevector <32 x float> %op1, <32 x float> %op2, <32 x i32> <i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,
i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46,
i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54,
i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
- store <32 x float> %ret, <32 x float>* %a
+ store <32 x float> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v64f32(<64 x float>* %a, <64 x float>* %b) vscale_range(16,0) #0 {
+define void @shuffle_ext_byone_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: insr z1.s, s0
; CHECK-NEXT: st1w { z1.s }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <64 x float>, <64 x float>* %a
- %op2 = load <64 x float>, <64 x float>* %b
+ %op1 = load <64 x float>, ptr %a
+ %op2 = load <64 x float>, ptr %b
%ret = shufflevector <64 x float> %op1, <64 x float> %op2, <64 x i32> <i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70,
i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78,
i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86,
i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110,
i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118,
i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126>
- store <64 x float> %ret, <64 x float>* %a
+ store <64 x float> %ret, ptr %a
ret void
}
ret <2 x double> %ret
}
-define void @shuffle_ext_byone_v4f64(<4 x double>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @shuffle_ext_byone_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: insr z1.d, d0
; CHECK-NEXT: st1d { z1.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%ret = shufflevector <4 x double> %op1, <4 x double> %op2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
- store <4 x double> %ret, <4 x double>* %a
+ store <4 x double> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
+define void @shuffle_ext_byone_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: shuffle_ext_byone_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
; VBITS_GE_512-NEXT: insr z1.d, d0
; VBITS_GE_512-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
- %op1 = load <8 x double>, <8 x double>* %a
- %op2 = load <8 x double>, <8 x double>* %b
+ %op1 = load <8 x double>, ptr %a
+ %op2 = load <8 x double>, ptr %b
%ret = shufflevector <8 x double> %op1, <8 x double> %op2, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
- store <8 x double> %ret, <8 x double>* %a
+ store <8 x double> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v16f64(<16 x double>* %a, <16 x double>* %b) vscale_range(8,0) #0 {
+define void @shuffle_ext_byone_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: insr z1.d, d0
; CHECK-NEXT: st1d { z1.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <16 x double>, <16 x double>* %a
- %op2 = load <16 x double>, <16 x double>* %b
+ %op1 = load <16 x double>, ptr %a
+ %op2 = load <16 x double>, ptr %b
%ret = shufflevector <16 x double> %op1, <16 x double> %op2, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22,
i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
- store <16 x double> %ret, <16 x double>* %a
+ store <16 x double> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_v32f64(<32 x double>* %a, <32 x double>* %b) vscale_range(16,0) #0 {
+define void @shuffle_ext_byone_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: insr z1.d, d0
; CHECK-NEXT: st1d { z1.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <32 x double>, <32 x double>* %a
- %op2 = load <32 x double>, <32 x double>* %b
+ %op1 = load <32 x double>, ptr %a
+ %op2 = load <32 x double>, ptr %b
%ret = shufflevector <32 x double> %op1, <32 x double> %op2, <32 x i32> <i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,
i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46,
i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54,
i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
- store <32 x double> %ret, <32 x double>* %a
+ store <32 x double> %ret, ptr %a
ret void
}
-define void @shuffle_ext_byone_reverse(<4 x double>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @shuffle_ext_byone_reverse(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: shuffle_ext_byone_reverse:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: insr z0.d, d1
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%ret = shufflevector <4 x double> %op1, <4 x double> %op2, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
- store <4 x double> %ret, <4 x double>* %a
+ store <4 x double> %ret, ptr %a
ret void
}
-define void @shuffle_ext_invalid(<4 x double>* %a, <4 x double>* %b) vscale_range(2,0) #0 {
+define void @shuffle_ext_invalid(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: shuffle_ext_invalid:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
- %op1 = load <4 x double>, <4 x double>* %a
- %op2 = load <4 x double>, <4 x double>* %b
+ %op1 = load <4 x double>, ptr %a
+ %op2 = load <4 x double>, ptr %b
%ret = shufflevector <4 x double> %op1, <4 x double> %op2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- store <4 x double> %ret, <4 x double>* %a
+ store <4 x double> %ret, ptr %a
ret void
}