defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_p>;
defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_p>;
- // Use more efficient NEON instructions to extract elements within the NEON
- // part (first 128bits) of an SVE register.
- def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)),
- (f16 (EXTRACT_SUBREG (v8f16 (EXTRACT_SUBREG ZPR:$Zs, zsub)), hsub))>;
- def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
- (f32 (EXTRACT_SUBREG (v4f32 (EXTRACT_SUBREG ZPR:$Zs, zsub)), ssub))>;
- def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
- (f64 (EXTRACT_SUBREG (v2f64 (EXTRACT_SUBREG ZPR:$Zs, zsub)), dsub))>;
-
// Splat immediate (unpredicated)
defm DUP_ZI : sve_int_dup_imm<"dup">;
defm FDUP_ZI : sve_int_dup_fpimm<"fdup">;
(DUP_ZR_D $index)),
$src)>;
+ // Extract element from vector with scalar index
+ def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_S (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
+
// Extract element from vector with immediate index
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)),
(EXTRACT_SUBREG (DUP_ZZI_B ZPR:$vec, sve_elm_idx_extdup_b:$index), ssub)>;
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
(EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
+ def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
+ def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>;
+ def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>;
def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
(EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>;
+ def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), ssub)>;
def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
- // Extract element from vector with scalar index
- def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)),
- (LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index),
- ZPR:$vec)>;
- def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)),
- (LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index),
- ZPR:$vec)>;
- def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)),
- (LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index),
- ZPR:$vec)>;
- def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)),
- (LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index),
- ZPR:$vec)>;
+ // Extract element from vector with immediate index that's within the bottom 128-bits.
+ let AddedComplexity = 1 in {
+ def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)),
+ (i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
+ def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)),
+ (i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
+ def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
+ (i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
+ def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)),
+ (i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
+ }
- def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)),
- (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index),
- ZPR:$vec)>;
- def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
- (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index),
- ZPR:$vec)>;
- def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)),
- (LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index),
- ZPR:$vec)>;
+ // Extract first element from vector.
+ let AddedComplexity = 2 in {
+ def : Pat<(vector_extract (nxv16i8 ZPR:$Zs), (i64 0)),
+ (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv8i16 ZPR:$Zs), (i64 0)),
+ (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv4i32 ZPR:$Zs), (i64 0)),
+ (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv2i64 ZPR:$Zs), (i64 0)),
+ (i64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
+ def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)),
+ (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
+ def : Pat<(vector_extract (nxv4f16 ZPR:$Zs), (i64 0)),
+ (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
+ def : Pat<(vector_extract (nxv2f16 ZPR:$Zs), (i64 0)),
+ (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
+ def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
+ (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv2f32 ZPR:$Zs), (i64 0)),
+ (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
+ (f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
+ }
}
let Predicates = [HasSVE, HasMatMulInt8] in {
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
+; RUN: llc < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
-define i8 @test_lane0_16xi8(<vscale x 16 x i8> %a) {
+target triple = "aarch64-unknown-linux-gnu"
+
+define i8 @test_lane0_16xi8(<vscale x 16 x i8> %a) #0 {
; CHECK-LABEL: test_lane0_16xi8:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.b, b0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 16 x i8> %a, i32 0
ret i8 %b
}
-define i16 @test_lane0_8xi16(<vscale x 8 x i16> %a) {
+define i8 @test_lane15_16xi8(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: test_lane15_16xi8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: umov w0, v0.b[15]
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 16 x i8> %a, i32 15
+ ret i8 %b
+}
+
+define i8 @test_lane16_16xi8(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: test_lane16_16xi8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.b, z0.b[16]
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 16 x i8> %a, i32 16
+ ret i8 %b
+}
+
+define i16 @test_lane0_8xi16(<vscale x 8 x i16> %a) #0 {
; CHECK-LABEL: test_lane0_8xi16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 8 x i16> %a, i32 0
ret i16 %b
}
-define i32 @test_lane0_4xi32(<vscale x 4 x i32> %a) {
+define i16 @test_lane7_8xi16(<vscale x 8 x i16> %a) #0 {
+; CHECK-LABEL: test_lane7_8xi16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: umov w0, v0.h[7]
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 8 x i16> %a, i32 7
+ ret i16 %b
+}
+
+define i16 @test_lane8_8xi16(<vscale x 8 x i16> %a) #0 {
+; CHECK-LABEL: test_lane8_8xi16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, z0.h[8]
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 8 x i16> %a, i32 8
+ ret i16 %b
+}
+
+define i32 @test_lane0_4xi32(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: test_lane0_4xi32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x i32> %a, i32 0
ret i32 %b
}
-define i64 @test_lane0_2xi64(<vscale x 2 x i64> %a) {
+define i32 @test_lane3_4xi32(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: test_lane3_4xi32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w0, v0.s[3]
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 4 x i32> %a, i32 3
+ ret i32 %b
+}
+
+define i32 @test_lane4_4xi32(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: test_lane4_4xi32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, z0.s[4]
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 4 x i32> %a, i32 4
+ ret i32 %b
+}
+
+define i64 @test_lane0_2xi64(<vscale x 2 x i64> %a) #0 {
; CHECK-LABEL: test_lane0_2xi64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.d, d0
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%b = extractelement <vscale x 2 x i64> %a, i32 0
ret i64 %b
}
-define double @test_lane0_2xf64(<vscale x 2 x double> %a) {
-; CHECK-LABEL: test_lane0_2xf64:
+define i64 @test_lane1_2xi64(<vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: test_lane1_2xi64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: mov x0, v0.d[1]
; CHECK-NEXT: ret
- %b = extractelement <vscale x 2 x double> %a, i32 0
- ret double %b
+ %b = extractelement <vscale x 2 x i64> %a, i32 1
+ ret i64 %b
}
-define float @test_lane0_4xf32(<vscale x 4 x float> %a) {
-; CHECK-LABEL: test_lane0_4xf32:
+define i64 @test_lane2_2xi64(<vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: test_lane2_2xi64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: mov z0.d, z0.d[2]
+; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %b = extractelement <vscale x 4 x float> %a, i32 0
- ret float %b
+ %b = extractelement <vscale x 2 x i64> %a, i32 2
+ ret i64 %b
}
-define half @test_lane0_8xf16(<vscale x 8 x half> %a) {
+define half @test_lane0_8xf16(<vscale x 8 x half> %a) #0 {
; CHECK-LABEL: test_lane0_8xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
ret half %b
}
-define i8 @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
+define half @test_lane7_8xf16(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: test_lane7_8xf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, z0.h[7]
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 8 x half> %a, i32 7
+ ret half %b
+}
+
+define half @test_lane8_8xf16(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: test_lane8_8xf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, z0.h[8]
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 8 x half> %a, i32 8
+ ret half %b
+}
+
+define half @test_lane0_4xf16(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: test_lane0_4xf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 4 x half> %a, i32 0
+ ret half %b
+}
+
+define half @test_lane3_4xf16(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: test_lane3_4xf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, z0.s[3]
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 4 x half> %a, i32 3
+ ret half %b
+}
+
+define half @test_lane4_4xf16(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: test_lane4_4xf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, z0.s[4]
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 4 x half> %a, i32 4
+ ret half %b
+}
+
+define half @test_lane0_2xf16(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: test_lane0_2xf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 2 x half> %a, i32 0
+ ret half %b
+}
+
+define half @test_lane1_2xf16(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: test_lane1_2xf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 2 x half> %a, i32 1
+ ret half %b
+}
+
+define half @test_lane2_2xf16(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: test_lane2_2xf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[2]
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 2 x half> %a, i32 2
+ ret half %b
+}
+
+define float @test_lane0_4xf32(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: test_lane0_4xf32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 4 x float> %a, i32 0
+ ret float %b
+}
+
+define float @test_lane3_4xf32(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: test_lane3_4xf32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, z0.s[3]
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 4 x float> %a, i32 3
+ ret float %b
+}
+
+define float @test_lane4_4xf32(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: test_lane4_4xf32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, z0.s[4]
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 4 x float> %a, i32 4
+ ret float %b
+}
+
+define float @test_lane0_2xf32(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: test_lane0_2xf32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 2 x float> %a, i32 0
+ ret float %b
+}
+
+define float @test_lane1_2xf32(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: test_lane1_2xf32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 2 x float> %a, i32 1
+ ret float %b
+}
+
+define float @test_lane2_2xf32(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: test_lane2_2xf32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[2]
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 2 x float> %a, i32 2
+ ret float %b
+}
+
+define double @test_lane0_2xf64(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: test_lane0_2xf64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 2 x double> %a, i32 0
+ ret double %b
+}
+
+define double @test_lane1_2xf64(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: test_lane1_2xf64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 2 x double> %a, i32 1
+ ret double %b
+}
+
+define double @test_lane2_2xf64(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: test_lane2_2xf64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[2]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 2 x double> %a, i32 2
+ ret double %b
+}
+
+define i8 @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
ret i8 %b
}
-define i16 @test_lanex_8xi16(<vscale x 8 x i16> %a, i32 %x) {
+define i16 @test_lanex_8xi16(<vscale x 8 x i16> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
ret i16 %b
}
-define i32 @test_lanex_4xi32(<vscale x 4 x i32> %a, i32 %x) {
+define i32 @test_lanex_4xi32(<vscale x 4 x i32> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
ret i32 %b
}
-define i64 @test_lanex_2xi64(<vscale x 2 x i64> %a, i32 %x) {
+define i64 @test_lanex_2xi64(<vscale x 2 x i64> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
ret i64 %b
}
-define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) {
-; CHECK-LABEL: test_lanex_2xf64:
+define half @test_lanex_8xf16(<vscale x 8 x half> %a, i32 %x) #0 {
+; CHECK-LABEL: test_lanex_8xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: whilels p0.d, xzr, x8
-; CHECK-NEXT: lastb d0, p0, z0.d
+; CHECK-NEXT: whilels p0.h, xzr, x8
+; CHECK-NEXT: lastb h0, p0, z0.h
; CHECK-NEXT: ret
- %b = extractelement <vscale x 2 x double> %a, i32 %x
- ret double %b
+ %b = extractelement <vscale x 8 x half> %a, i32 %x
+ ret half %b
}
-define float @test_lanex_4xf32(<vscale x 4 x float> %a, i32 %x) {
-; CHECK-LABEL: test_lanex_4xf32:
+define half @test_lanex_4xf16(<vscale x 4 x half> %a, i32 %x) #0 {
+; CHECK-LABEL: test_lanex_4xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: whilels p0.s, xzr, x8
-; CHECK-NEXT: lastb s0, p0, z0.s
+; CHECK-NEXT: lastb h0, p0, z0.h
; CHECK-NEXT: ret
- %b = extractelement <vscale x 4 x float> %a, i32 %x
- ret float %b
+ %b = extractelement <vscale x 4 x half> %a, i32 %x
+ ret half %b
}
-define half @test_lanex_8xf16(<vscale x 8 x half> %a, i32 %x) {
-; CHECK-LABEL: test_lanex_8xf16:
+define half @test_lanex_2xf16(<vscale x 2 x half> %a, i32 %x) #0 {
+; CHECK-LABEL: test_lanex_2xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: whilels p0.h, xzr, x8
+; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb h0, p0, z0.h
; CHECK-NEXT: ret
- %b = extractelement <vscale x 8 x half> %a, i32 %x
+ %b = extractelement <vscale x 2 x half> %a, i32 %x
ret half %b
}
-; Deliberately choose an index that is out-of-bounds
-define i8 @test_lane64_16xi8(<vscale x 16 x i8> %a) {
-; CHECK-LABEL: test_lane64_16xi8:
+define float @test_lanex_4xf32(<vscale x 4 x float> %a, i32 %x) #0 {
+; CHECK-LABEL: test_lanex_4xf32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #64
-; CHECK-NEXT: whilels p0.b, xzr, x8
-; CHECK-NEXT: lastb w0, p0, z0.b
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: whilels p0.s, xzr, x8
+; CHECK-NEXT: lastb s0, p0, z0.s
; CHECK-NEXT: ret
- %b = extractelement <vscale x 16 x i8> %a, i32 64
- ret i8 %b
+ %b = extractelement <vscale x 4 x float> %a, i32 %x
+ ret float %b
+}
+
+define float @test_lanex_2xf32(<vscale x 2 x float> %a, i32 %x) #0 {
+; CHECK-LABEL: test_lanex_2xf32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: whilels p0.d, xzr, x8
+; CHECK-NEXT: lastb s0, p0, z0.s
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 2 x float> %a, i32 %x
+ ret float %b
}
-define double @test_lane9_2xf64(<vscale x 2 x double> %a) {
-; CHECK-LABEL: test_lane9_2xf64:
+define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) #0 {
+; CHECK-LABEL: test_lanex_2xf64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #9
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb d0, p0, z0.d
; CHECK-NEXT: ret
- %b = extractelement <vscale x 2 x double> %a, i32 9
+ %b = extractelement <vscale x 2 x double> %a, i32 %x
ret double %b
}
; Deliberately choose an index that is undefined
-define i32 @test_lane64_4xi32(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: test_lane64_4xi32:
+define i32 @test_undef_lane_4xi32(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: test_undef_lane_4xi32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x i32> %a, i32 undef
ret i32 %b
}
-define i8 @extract_of_insert_undef_16xi8(i8 %a) {
+define i8 @extract_of_insert_undef_16xi8(i8 %a) #0 {
; CHECK-LABEL: extract_of_insert_undef_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
ret i8 %c
}
-define i8 @extract0_of_insert0_16xi8(<vscale x 16 x i8> %a, i8 %b) {
+define i8 @extract0_of_insert0_16xi8(<vscale x 16 x i8> %a, i8 %b) #0 {
; CHECK-LABEL: extract0_of_insert0_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
ret i8 %d
}
-define i8 @extract64_of_insert64_16xi8(<vscale x 16 x i8> %a, i8 %b) {
+define i8 @extract64_of_insert64_16xi8(<vscale x 16 x i8> %a, i8 %b) #0 {
; CHECK-LABEL: extract64_of_insert64_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
ret i8 %d
}
-define i8 @extract_of_insert_diff_lanes_16xi8(<vscale x 16 x i8> %a, i8 %b) {
+define i8 @extract_of_insert_diff_lanes_16xi8(<vscale x 16 x i8> %a, i8 %b) #0 {
; CHECK-LABEL: extract_of_insert_diff_lanes_16xi8:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.b, z0.b[3]
-; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: umov w0, v0.b[3]
; CHECK-NEXT: ret
%c = insertelement <vscale x 16 x i8> %a, i8 %b, i32 0
%d = extractelement <vscale x 16 x i8> %c, i32 3
ret i8 %d
}
-define i8 @test_lane0_zero_16xi8(<vscale x 16 x i8> %a) {
+define i8 @test_lane0_zero_16xi8(<vscale x 16 x i8> %a) #0 {
; CHECK-LABEL: test_lane0_zero_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w0, wzr
; The DAG combiner should fold the extract of a splat to give element zero
; of the splat, i.e. %x. If the index is beyond the end of the scalable
; vector the result is undefined anyway.
-define i64 @test_lanex_splat_2xi64(i64 %x, i32 %y) {
+define i64 @test_lanex_splat_2xi64(i64 %x, i32 %y) #0 {
; CHECK-LABEL: test_lanex_splat_2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%c = extractelement <vscale x 2 x i64> %b, i32 %y
ret i64 %c
}
+
+attributes #0 = { "target-features"="+sve" }