From 66bd7ebdf76ab1758469145a5194b6fa833dd3a9 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Mon, 24 Jan 2022 12:35:18 +0000 Subject: [PATCH] [SVE] Use DUPM to handling more splat immediate cases. NOTE: Only considers i64 based vectors at this time because smaller element types require extra isel operand parsing. Differential Revision: https://reviews.llvm.org/D118040 --- llvm/lib/Target/AArch64/SVEInstrFormats.td | 3 +++ llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll | 15 +++++---------- .../CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll | 9 +++------ llvm/test/CodeGen/AArch64/sve-vector-splat.ll | 21 +++++++++++++++------ llvm/test/CodeGen/AArch64/sve-vselect-imm.ll | 10 ++++------ llvm/test/CodeGen/AArch64/sve2-int-mul.ll | 3 +-- 6 files changed, 31 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 466c785..359f5af 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1708,6 +1708,9 @@ multiclass sve_int_dup_mask_imm { (!cast(NAME) ZPR32:$Zd, sve_preferred_logical_imm32:$imm), 6>; def : InstAlias<"mov $Zd, $imm", (!cast(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>; + + def : Pat<(nxv2i64 (AArch64dup (i64 logical_imm64:$imm))), + (!cast(NAME) logical_imm64:$imm)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll index 4a355e9..3f0aa28 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll @@ -133,9 +133,8 @@ define @smax_i64_neg( %a) { define @smax_i64_out_of_range( %a) { ; CHECK-LABEL: smax_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %elt = insertelement undef, i64 65535, i32 0 @@ -277,9 +276,8 @@ define @smin_i64_neg( %a) { define @smin_i64_out_of_range( %a) { ; CHECK-LABEL: smin_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %elt = insertelement undef, i64 65535, i32 0 @@ -385,9 +383,8 @@ define @umax_i64_pos( %a) { define @umax_i64_out_of_range( %a) { ; CHECK-LABEL: umax_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %elt = insertelement undef, i64 65535, i32 0 @@ -493,9 +490,8 @@ define @umin_i64_pos( %a) { define @umin_i64_out_of_range( %a) { ; CHECK-LABEL: umin_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %elt = insertelement undef, i64 65535, i32 0 @@ -627,9 +623,8 @@ define @mul_i32_range( %a) { define @mul_i64_range( %a) { ; CHECK-LABEL: mul_i64_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov z1.d, #255 // =0xff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll index 30309db..8b57622 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll @@ -514,9 +514,8 @@ define @smax_i64( %a) { define @smax_i64_out_of_range( %a) { ; CHECK-LABEL: smax_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65535 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -832,9 +831,8 @@ define @umax_i64( %a) { define @umax_i64_out_of_range( %a) { ; CHECK-LABEL: umax_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65535 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -991,9 +989,8 @@ define @umin_i64( %a) { define @umin_i64_out_of_range( %a) { ; CHECK-LABEL: umin_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65535 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll index 5882173..641ee2f 100644 --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -73,8 +73,8 @@ define @sve_splat_4xi32_imm() { ret %splat } -define @sve_splat_2xi64_imm() { -; CHECK-LABEL: sve_splat_2xi64_imm: +define @sve_splat_2xi64_dup_imm() { +; CHECK-LABEL: sve_splat_2xi64_dup_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.d, #1 // =0x1 ; CHECK-NEXT: ret @@ -83,6 +83,16 @@ define @sve_splat_2xi64_imm() { ret %splat } +define @sve_splat_2xi64_dupm_imm() { +; CHECK-LABEL: sve_splat_2xi64_dupm_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #0xffff00000000 +; CHECK-NEXT: ret + %ins = insertelement undef, i64 281470681743360, i32 0 ; 0xffff00000000 + %splat = shufflevector %ins, undef, zeroinitializer + ret %splat +} + ;; Promote splats of smaller illegal integer vector types define @sve_splat_2xi8(i8 %val) { @@ -173,8 +183,7 @@ define @sve_splat_2xi32(i32 %val) { define @sve_splat_2xi32_imm() { ; CHECK-LABEL: sve_splat_2xi32_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: mov z0.d, #0xffffffff ; CHECK-NEXT: ret %ins = insertelement undef, i32 -1, i32 0 %splat = shufflevector %ins, undef, zeroinitializer @@ -530,9 +539,9 @@ define @splat_nxv4f32_imm_out_of_range() { define @splat_nxv2f64_imm_out_of_range() { ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI50_0 +; CHECK-NEXT: adrp x8, .LCPI51_0 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: add x8, x8, :lo12:.LCPI50_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI51_0 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %1 = insertelement undef, double 3.33, i32 0 diff --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll index 63d95f3..18024eb 100644 --- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll @@ -144,10 +144,9 @@ ret %sel define @sel_64_illegal_wrong_extension( %p) { ; CHECK-LABEL: sel_64_illegal_wrong_extension: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 -; CHECK-NEXT: mov z1.d, #0 // =0x0 -; CHECK-NEXT: mov z0.d, x8 -; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: mov z0.d, #0 // =0x0 +; CHECK-NEXT: mov z1.d, #128 // =0x80 +; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret %vec = shufflevector insertelement ( undef, i64 128, i32 0), zeroinitializer, zeroinitializer %sel = select %p, %vec, zeroinitializer @@ -370,8 +369,7 @@ ret %sel define @sel_merge_64_illegal_wrong_extension( %p, %in) { ; CHECK-LABEL: sel_merge_64_illegal_wrong_extension: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 -; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov z1.d, #128 // =0x80 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret %vec = shufflevector insertelement ( undef, i64 128, i32 0), zeroinitializer, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll index 57d5775..4bf30e1 100644 --- a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll +++ b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll @@ -59,8 +59,7 @@ define @mul_i32_imm_neg( %a) { define @mul_i64_imm( %a) { ; CHECK-LABEL: mul_i64_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov z1.d, #255 // =0xff ; CHECK-NEXT: mul z0.d, z0.d, z1.d ; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 -- 2.7.4