; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LD1R
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+no-sve-fp-ld1r < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-LD1R
;
; Check that ldr1* instruction is generated to splat scalar during load,
; rather than mov from scalar to vector register (which would require the vector unit).
}
define <vscale x 8 x half> @ld1rh_half(ptr %valp) {
-; CHECK-LABEL: ld1rh_half:
+; CHECK-LD1R-LABEL: ld1rh_half:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ptrue p0.h
+; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rh_half:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h0, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.h, h0
+; CHECK-NO-LD1R-NEXT: ret
+ %val = load half, ptr %valp
+ %ins = insertelement <vscale x 8 x half> undef, half %val, i32 0
+ %shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %shf
+}
+
+define <vscale x 8 x half> @ld1rh_half_neoverse(ptr %valp) #1 {
+; CHECK-LABEL: ld1rh_half_neoverse:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%val = load half, ptr %valp
%ins = insertelement <vscale x 8 x half> undef, half %val, i32 0
}
define <vscale x 8 x half> @ld1rh_half_gep(ptr %valp) {
-; CHECK-LABEL: ld1rh_half_gep:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0, #126]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rh_half_gep:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ptrue p0.h
+; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x0, #126]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rh_half_gep:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #126]
+; CHECK-NO-LD1R-NEXT: mov z0.h, h0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr half, ptr %valp, i32 63
%val = load half, ptr %valp2
%ins = insertelement <vscale x 8 x half> undef, half %val, i32 0
}
define <vscale x 8 x half> @ld1rh_half_gep_out_of_range_up(ptr %valp) {
-; CHECK-LABEL: ld1rh_half_gep_out_of_range_up:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #128
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rh_half_gep_out_of_range_up:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: add x8, x0, #128
+; CHECK-LD1R-NEXT: ptrue p0.h
+; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x8]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rh_half_gep_out_of_range_up:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #128]
+; CHECK-NO-LD1R-NEXT: mov z0.h, h0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr half, ptr %valp, i32 64
%val = load half, ptr %valp2
%ins = insertelement <vscale x 8 x half> undef, half %val, i32 0
}
define <vscale x 8 x half> @ld1rh_half_gep_out_of_range_down(ptr %valp) {
-; CHECK-LABEL: ld1rh_half_gep_out_of_range_down:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x0, #2
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rh_half_gep_out_of_range_down:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: sub x8, x0, #2
+; CHECK-LD1R-NEXT: ptrue p0.h
+; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x8]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rh_half_gep_out_of_range_down:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldur h0, [x0, #-2]
+; CHECK-NO-LD1R-NEXT: mov z0.h, h0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr half, ptr %valp, i32 -1
%val = load half, ptr %valp2
%ins = insertelement <vscale x 8 x half> undef, half %val, i32 0
}
define <vscale x 4 x half> @ld1rh_half_unpacked4(ptr %valp) {
-; CHECK-LABEL: ld1rh_half_unpacked4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rh_half_unpacked4:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ptrue p0.s
+; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked4:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h0, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.h, h0
+; CHECK-NO-LD1R-NEXT: ret
%val = load half, ptr %valp
%ins = insertelement <vscale x 4 x half> undef, half %val, i32 0
%shf = shufflevector <vscale x 4 x half> %ins, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
}
define <vscale x 4 x half> @ld1rh_half_unpacked4_gep(ptr %valp) {
-; CHECK-LABEL: ld1rh_half_unpacked4_gep:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0, #126]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rh_half_unpacked4_gep:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ptrue p0.s
+; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x0, #126]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked4_gep:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #126]
+; CHECK-NO-LD1R-NEXT: mov z0.h, h0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr half, ptr %valp, i32 63
%val = load half, ptr %valp2
%ins = insertelement <vscale x 4 x half> undef, half %val, i32 0
}
define <vscale x 4 x half> @ld1rh_half_unpacked4_gep_out_of_range_up(ptr %valp) {
-; CHECK-LABEL: ld1rh_half_unpacked4_gep_out_of_range_up:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #128
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rh_half_unpacked4_gep_out_of_range_up:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: add x8, x0, #128
+; CHECK-LD1R-NEXT: ptrue p0.s
+; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x8]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked4_gep_out_of_range_up:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #128]
+; CHECK-NO-LD1R-NEXT: mov z0.h, h0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr half, ptr %valp, i32 64
%val = load half, ptr %valp2
%ins = insertelement <vscale x 4 x half> undef, half %val, i32 0
}
define <vscale x 4 x half> @ld1rh_half_unpacked4_gep_out_of_range_down(ptr %valp) {
-; CHECK-LABEL: ld1rh_half_unpacked4_gep_out_of_range_down:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x0, #2
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rh_half_unpacked4_gep_out_of_range_down:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: sub x8, x0, #2
+; CHECK-LD1R-NEXT: ptrue p0.s
+; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x8]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked4_gep_out_of_range_down:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldur h0, [x0, #-2]
+; CHECK-NO-LD1R-NEXT: mov z0.h, h0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr half, ptr %valp, i32 -1
%val = load half, ptr %valp2
%ins = insertelement <vscale x 4 x half> undef, half %val, i32 0
}
define <vscale x 2 x half> @ld1rh_half_unpacked2(ptr %valp) {
-; CHECK-LABEL: ld1rh_half_unpacked2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rh_half_unpacked2:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ptrue p0.d
+; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked2:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h0, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.h, h0
+; CHECK-NO-LD1R-NEXT: ret
%val = load half, ptr %valp
%ins = insertelement <vscale x 2 x half> undef, half %val, i32 0
%shf = shufflevector <vscale x 2 x half> %ins, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
}
define <vscale x 2 x half> @ld1rh_half_unpacked2_gep(ptr %valp) {
-; CHECK-LABEL: ld1rh_half_unpacked2_gep:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0, #126]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rh_half_unpacked2_gep:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ptrue p0.d
+; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x0, #126]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked2_gep:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #126]
+; CHECK-NO-LD1R-NEXT: mov z0.h, h0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr half, ptr %valp, i32 63
%val = load half, ptr %valp2
%ins = insertelement <vscale x 2 x half> undef, half %val, i32 0
}
define <vscale x 2 x half> @ld1rh_half_unpacked2_gep_out_of_range_up(ptr %valp) {
-; CHECK-LABEL: ld1rh_half_unpacked2_gep_out_of_range_up:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #128
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rh_half_unpacked2_gep_out_of_range_up:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: add x8, x0, #128
+; CHECK-LD1R-NEXT: ptrue p0.d
+; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x8]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked2_gep_out_of_range_up:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #128]
+; CHECK-NO-LD1R-NEXT: mov z0.h, h0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr half, ptr %valp, i32 64
%val = load half, ptr %valp2
%ins = insertelement <vscale x 2 x half> undef, half %val, i32 0
}
define <vscale x 2 x half> @ld1rh_half_unpacked2_gep_out_of_range_down(ptr %valp) {
-; CHECK-LABEL: ld1rh_half_unpacked2_gep_out_of_range_down:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x0, #2
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rh_half_unpacked2_gep_out_of_range_down:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: sub x8, x0, #2
+; CHECK-LD1R-NEXT: ptrue p0.d
+; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x8]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked2_gep_out_of_range_down:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldur h0, [x0, #-2]
+; CHECK-NO-LD1R-NEXT: mov z0.h, h0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr half, ptr %valp, i32 -1
%val = load half, ptr %valp2
%ins = insertelement <vscale x 2 x half> undef, half %val, i32 0
}
define <vscale x 4 x float> @ld1rw_float(ptr %valp) {
-; CHECK-LABEL: ld1rw_float:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rw_float:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ptrue p0.s
+; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rw_float:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr s0, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.s, s0
+; CHECK-NO-LD1R-NEXT: ret
%val = load float, ptr %valp
%ins = insertelement <vscale x 4 x float> undef, float %val, i32 0
%shf = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
}
define <vscale x 4 x float> @ld1rw_float_gep(ptr %valp) {
-; CHECK-LABEL: ld1rw_float_gep:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0, #252]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rw_float_gep:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ptrue p0.s
+; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x0, #252]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rw_float_gep:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr s0, [x0, #252]
+; CHECK-NO-LD1R-NEXT: mov z0.s, s0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr float, ptr %valp, i32 63
%val = load float, ptr %valp2
%ins = insertelement <vscale x 4 x float> undef, float %val, i32 0
}
define <vscale x 4 x float> @ld1rw_float_gep_out_of_range_up(ptr %valp) {
-; CHECK-LABEL: ld1rw_float_gep_out_of_range_up:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #256
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rw_float_gep_out_of_range_up:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: add x8, x0, #256
+; CHECK-LD1R-NEXT: ptrue p0.s
+; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x8]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rw_float_gep_out_of_range_up:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr s0, [x0, #256]
+; CHECK-NO-LD1R-NEXT: mov z0.s, s0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr float, ptr %valp, i32 64
%val = load float, ptr %valp2
%ins = insertelement <vscale x 4 x float> undef, float %val, i32 0
}
define <vscale x 4 x float> @ld1rw_float_gep_out_of_range_down(ptr %valp) {
-; CHECK-LABEL: ld1rw_float_gep_out_of_range_down:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x0, #4
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rw_float_gep_out_of_range_down:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: sub x8, x0, #4
+; CHECK-LD1R-NEXT: ptrue p0.s
+; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x8]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rw_float_gep_out_of_range_down:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldur s0, [x0, #-4]
+; CHECK-NO-LD1R-NEXT: mov z0.s, s0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr float, ptr %valp, i32 -1
%val = load float, ptr %valp2
%ins = insertelement <vscale x 4 x float> undef, float %val, i32 0
}
define <vscale x 2 x float> @ld1rw_float_unpacked2(ptr %valp) {
-; CHECK-LABEL: ld1rw_float_unpacked2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rw_float_unpacked2:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ptrue p0.d
+; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rw_float_unpacked2:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr s0, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.s, s0
+; CHECK-NO-LD1R-NEXT: ret
%val = load float, ptr %valp
%ins = insertelement <vscale x 2 x float> undef, float %val, i32 0
%shf = shufflevector <vscale x 2 x float> %ins, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
}
define <vscale x 2 x float> @ld1rw_float_unpacked2_gep(ptr %valp) {
-; CHECK-LABEL: ld1rw_float_unpacked2_gep:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0, #252]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rw_float_unpacked2_gep:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ptrue p0.d
+; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x0, #252]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rw_float_unpacked2_gep:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr s0, [x0, #252]
+; CHECK-NO-LD1R-NEXT: mov z0.s, s0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr float, ptr %valp, i32 63
%val = load float, ptr %valp2
%ins = insertelement <vscale x 2 x float> undef, float %val, i32 0
}
define <vscale x 2 x float> @ld1rw_float_unpacked2_gep_out_of_range_up(ptr %valp) {
-; CHECK-LABEL: ld1rw_float_unpacked2_gep_out_of_range_up:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #256
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rw_float_unpacked2_gep_out_of_range_up:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: add x8, x0, #256
+; CHECK-LD1R-NEXT: ptrue p0.d
+; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x8]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rw_float_unpacked2_gep_out_of_range_up:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr s0, [x0, #256]
+; CHECK-NO-LD1R-NEXT: mov z0.s, s0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr float, ptr %valp, i32 64
%val = load float, ptr %valp2
%ins = insertelement <vscale x 2 x float> undef, float %val, i32 0
}
define <vscale x 2 x float> @ld1rw_float_unpacked2_gep_out_of_range_down(ptr %valp) {
-; CHECK-LABEL: ld1rw_float_unpacked2_gep_out_of_range_down:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x0, #4
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rw_float_unpacked2_gep_out_of_range_down:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: sub x8, x0, #4
+; CHECK-LD1R-NEXT: ptrue p0.d
+; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x8]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rw_float_unpacked2_gep_out_of_range_down:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldur s0, [x0, #-4]
+; CHECK-NO-LD1R-NEXT: mov z0.s, s0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr float, ptr %valp, i32 -1
%val = load float, ptr %valp2
%ins = insertelement <vscale x 2 x float> undef, float %val, i32 0
}
define <vscale x 2 x double> @ld1rd_double(ptr %valp) {
-; CHECK-LABEL: ld1rd_double:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rd_double:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ptrue p0.d
+; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rd_double:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr d0, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.d, d0
+; CHECK-NO-LD1R-NEXT: ret
%val = load double, ptr %valp
%ins = insertelement <vscale x 2 x double> undef, double %val, i32 0
%shf = shufflevector <vscale x 2 x double> %ins, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
}
define <vscale x 2 x double> @ld1rd_double_gep(ptr %valp) {
-; CHECK-LABEL: ld1rd_double_gep:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0, #504]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rd_double_gep:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ptrue p0.d
+; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x0, #504]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rd_double_gep:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr d0, [x0, #504]
+; CHECK-NO-LD1R-NEXT: mov z0.d, d0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr double, ptr %valp, i32 63
%val = load double, ptr %valp2
%ins = insertelement <vscale x 2 x double> undef, double %val, i32 0
}
define <vscale x 2 x double> @ld1rd_double_gep_out_of_range_up(ptr %valp) {
-; CHECK-LABEL: ld1rd_double_gep_out_of_range_up:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #512
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rd_double_gep_out_of_range_up:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: add x8, x0, #512
+; CHECK-LD1R-NEXT: ptrue p0.d
+; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x8]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rd_double_gep_out_of_range_up:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr d0, [x0, #512]
+; CHECK-NO-LD1R-NEXT: mov z0.d, d0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr double, ptr %valp, i32 64
%val = load double, ptr %valp2
%ins = insertelement <vscale x 2 x double> undef, double %val, i32 0
}
define <vscale x 2 x double> @ld1rd_double_gep_out_of_range_down(ptr %valp) {
-; CHECK-LABEL: ld1rd_double_gep_out_of_range_down:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x0, #8
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: ld1rd_double_gep_out_of_range_down:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: sub x8, x0, #8
+; CHECK-LD1R-NEXT: ptrue p0.d
+; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x8]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: ld1rd_double_gep_out_of_range_down:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldur d0, [x0, #-8]
+; CHECK-NO-LD1R-NEXT: mov z0.d, d0
+; CHECK-NO-LD1R-NEXT: ret
%valp2 = getelementptr double, ptr %valp, i32 -1
%val = load double, ptr %valp2
%ins = insertelement <vscale x 2 x double> undef, double %val, i32 0
ret <vscale x 2 x i64> %res
}
define <vscale x 8 x half> @dup_ld1rh_half_passthruundef_nxv8f16(<vscale x 8 x i1> %pg, ptr %addr) {
-; CHECK-LABEL: dup_ld1rh_half_passthruundef_nxv8f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruundef_nxv8f16:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruundef_nxv8f16:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h0, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h0
+; CHECK-NO-LD1R-NEXT: ret
%ld = load half, ptr %addr
%res = call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, half %ld)
ret <vscale x 8 x half> %res
}
define <vscale x 4 x float> @dup_ld1rs_float_passthruundef_nxv4f32(<vscale x 4 x i1> %pg, ptr %addr) {
-; CHECK-LABEL: dup_ld1rs_float_passthruundef_nxv4f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: dup_ld1rs_float_passthruundef_nxv4f32:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: dup_ld1rs_float_passthruundef_nxv4f32:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr s0, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.s, p0/m, s0
+; CHECK-NO-LD1R-NEXT: ret
%ld = load float, ptr %addr
%res = call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, float %ld)
ret <vscale x 4 x float> %res
}
define <vscale x 2 x double> @dup_ld1rd_double_passthruundef_nxv2f64(<vscale x 2 x i1> %pg, ptr %addr) {
-; CHECK-LABEL: dup_ld1rd_double_passthruundef_nxv2f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: dup_ld1rd_double_passthruundef_nxv2f64:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: dup_ld1rd_double_passthruundef_nxv2f64:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr d0, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.d, p0/m, d0
+; CHECK-NO-LD1R-NEXT: ret
%ld = load double, ptr %addr
%res = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, double %ld)
ret <vscale x 2 x double> %res
}
define <vscale x 4 x half> @dup_ld1rh_half_passthruundef_nxv4f16(<vscale x 4 x i1> %pg, ptr %addr) {
-; CHECK-LABEL: dup_ld1rh_half_passthruundef_nxv4f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruundef_nxv4f16:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruundef_nxv4f16:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h0, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h0
+; CHECK-NO-LD1R-NEXT: ret
%ld = load half, ptr %addr
%res = call <vscale x 4 x half> @llvm.aarch64.sve.dup.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> %pg, half %ld)
ret <vscale x 4 x half> %res
ret <vscale x 2 x i64> %res
}
define <vscale x 8 x half> @dup_ld1rh_half_passthruzero_nxv8f16(<vscale x 8 x i1> %pg, ptr %addr) {
-; CHECK-LABEL: dup_ld1rh_half_passthruzero_nxv8f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv8f16:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv8f16:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h1, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h1
+; CHECK-NO-LD1R-NEXT: ret
%ld = load half, ptr %addr
%res = call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> %pg, half %ld)
ret <vscale x 8 x half> %res
}
define <vscale x 4 x float> @dup_ld1rs_float_passthruzero_nxv4f32(<vscale x 4 x i1> %pg, ptr %addr) {
-; CHECK-LABEL: dup_ld1rs_float_passthruzero_nxv4f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: dup_ld1rs_float_passthruzero_nxv4f32:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: dup_ld1rs_float_passthruzero_nxv4f32:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr s1, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NO-LD1R-NEXT: mov z0.s, p0/m, s1
+; CHECK-NO-LD1R-NEXT: ret
%ld = load float, ptr %addr
%res = call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> %pg, float %ld)
ret <vscale x 4 x float> %res
}
define <vscale x 2 x double> @dup_ld1rd_double_passthruzero_nxv2f64(<vscale x 2 x i1> %pg, ptr %addr) {
-; CHECK-LABEL: dup_ld1rd_double_passthruzero_nxv2f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: dup_ld1rd_double_passthruzero_nxv2f64:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: dup_ld1rd_double_passthruzero_nxv2f64:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr d1, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NO-LD1R-NEXT: mov z0.d, p0/m, d1
+; CHECK-NO-LD1R-NEXT: ret
%ld = load double, ptr %addr
%res = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, double %ld)
ret <vscale x 2 x double> %res
}
define <vscale x 4 x half> @dup_ld1rh_half_passthruzero_nxv4f16(<vscale x 4 x i1> %pg, ptr %addr) {
-; CHECK-LABEL: dup_ld1rh_half_passthruzero_nxv4f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv4f16:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv4f16:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h1, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h1
+; CHECK-NO-LD1R-NEXT: ret
%ld = load half, ptr %addr
%res = call <vscale x 4 x half> @llvm.aarch64.sve.dup.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x i1> %pg, half %ld)
ret <vscale x 4 x half> %res
}
define <vscale x 2 x half> @dup_ld1rh_half_passthruzero_nxv2f16(<vscale x 2 x i1> %pg, ptr %addr) {
-; CHECK-LABEL: dup_ld1rh_half_passthruzero_nxv2f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv2f16:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv2f16:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr h1, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h1
+; CHECK-NO-LD1R-NEXT: ret
%ld = load half, ptr %addr
%res = call <vscale x 2 x half> @llvm.aarch64.sve.dup.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x i1> %pg, half %ld)
ret <vscale x 2 x half> %res
}
define <vscale x 2 x float> @dup_ld1rs_float_passthruzero_nxv2f32(<vscale x 2 x i1> %pg, ptr %addr) {
-; CHECK-LABEL: dup_ld1rs_float_passthruzero_nxv2f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0]
-; CHECK-NEXT: ret
+; CHECK-LD1R-LABEL: dup_ld1rs_float_passthruzero_nxv2f32:
+; CHECK-LD1R: // %bb.0:
+; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x0]
+; CHECK-LD1R-NEXT: ret
+;
+; CHECK-NO-LD1R-LABEL: dup_ld1rs_float_passthruzero_nxv2f32:
+; CHECK-NO-LD1R: // %bb.0:
+; CHECK-NO-LD1R-NEXT: ldr s1, [x0]
+; CHECK-NO-LD1R-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NO-LD1R-NEXT: mov z0.s, p0/m, s1
+; CHECK-NO-LD1R-NEXT: ret
%ld = load float, ptr %addr
%res = call <vscale x 2 x float> @llvm.aarch64.sve.dup.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x i1> %pg, float %ld)
ret <vscale x 2 x float> %res
attributes #0 = { "target-features"="+sve,+bf16" }
+attributes #1 = { "target-cpu"="neoverse-v1" }