From 2040d20df2e22eefaa6b653e505a2bcf3499e786 Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Thu, 22 Apr 2021 12:15:43 +0100 Subject: [PATCH] [AArch64][SVE] Add missing patterns for scalar versions of SQSHL/UQSHL Differential Revision: https://reviews.llvm.org/D101058 --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 10 +- .../CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll | 197 +++++++++++++++++++++ 2 files changed, 202 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index c2d4644..549fbe8 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2532,11 +2532,11 @@ let Predicates = [HasSVE2] in { } // SVE2 predicated shifts - defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">; - defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">; - defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>; - defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>; - defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>; + defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0110, "sqshl", "SQSHL_ZPZI", int_aarch64_sve_sqshl>; + defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0111, "uqshl", "UQSHL_ZPZI", int_aarch64_sve_uqshl>; + defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right< 0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>; + defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right< 0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>; + defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>; // SVE2 integer add/subtract long defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>; diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll index 2151eed..30e521c 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll @@ -751,6 +751,102 @@ define @sqshl_i64( %pg, % } ; +; SQSHL (Scalar) +; + +define @sqshl_n_i8( %pg, %a) { +; CHECK-LABEL: sqshl_n_i8: +; CHECK: sqshl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 7) + %out = call @llvm.aarch64.sve.sqshl.nxv16i8( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i16( %pg, %a) { +; CHECK-LABEL: sqshl_n_i16: +; CHECK: sqshl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 15) + %out = call @llvm.aarch64.sve.sqshl.nxv8i16( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i32( %pg, %a) { +; CHECK-LABEL: sqshl_n_i32: +; CHECK: sqshl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 31) + %out = call @llvm.aarch64.sve.sqshl.nxv4i32( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i64( %pg, %a) { +; CHECK-LABEL: sqshl_n_i64: +; CHECK: sqshl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 63) + %out = call @llvm.aarch64.sve.sqshl.nxv2i64( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i8_range( %pg, %a) { +; CHECK-LABEL: sqshl_n_i8_range: +; CHECK: mov z1.b, #8 +; CHECK: sqshl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 8) + %out = call @llvm.aarch64.sve.sqshl.nxv16i8( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i16_range( %pg, %a) { +; CHECK-LABEL: sqshl_n_i16_range: +; CHECK: mov z1.h, #16 +; CHECK: sqshl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 16) + %out = call @llvm.aarch64.sve.sqshl.nxv8i16( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i32_range( %pg, %a) { +; CHECK-LABEL: sqshl_n_i32_range: +; CHECK: mov z1.s, #32 +; CHECK: sqshl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 32) + %out = call @llvm.aarch64.sve.sqshl.nxv4i32( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i64_range( %pg, %a) { +; CHECK-LABEL: sqshl_n_i64_range: +; CHECK: mov z1.d, #64 +; CHECK: sqshl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 64) + %out = call @llvm.aarch64.sve.sqshl.nxv2i64( %pg, + %a, + %dup) + ret %out +} + +; ; SQSHLU ; @@ -1499,6 +1595,102 @@ define @uqshl_i64( %pg, % } ; +; UQSHL (Scalar) +; + +define @uqshl_n_i8( %pg, %a) { +; CHECK-LABEL: uqshl_n_i8: +; CHECK: uqshl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 7) + %out = call @llvm.aarch64.sve.uqshl.nxv16i8( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i16( %pg, %a) { +; CHECK-LABEL: uqshl_n_i16: +; CHECK: uqshl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 15) + %out = call @llvm.aarch64.sve.uqshl.nxv8i16( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i32( %pg, %a) { +; CHECK-LABEL: uqshl_n_i32: +; CHECK: uqshl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 31) + %out = call @llvm.aarch64.sve.uqshl.nxv4i32( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i64( %pg, %a) { +; CHECK-LABEL: uqshl_n_i64: +; CHECK: uqshl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 63) + %out = call @llvm.aarch64.sve.uqshl.nxv2i64( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i8_range( %pg, %a) { +; CHECK-LABEL: uqshl_n_i8_range: +; CHECK: mov z1.b, #8 +; CHECK: uqshl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 8) + %out = call @llvm.aarch64.sve.uqshl.nxv16i8( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i16_range( %pg, %a) { +; CHECK-LABEL: uqshl_n_i16_range: +; CHECK: mov z1.h, #16 +; CHECK: uqshl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 16) + %out = call @llvm.aarch64.sve.uqshl.nxv8i16( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i32_range( %pg, %a) { +; CHECK-LABEL: uqshl_n_i32_range: +; CHECK: mov z1.s, #32 +; CHECK: uqshl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 32) + %out = call @llvm.aarch64.sve.uqshl.nxv4i32( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i64_range( %pg, %a) { +; CHECK-LABEL: uqshl_n_i64_range: +; CHECK: mov z1.d, #64 +; CHECK: uqshl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 64) + %out = call @llvm.aarch64.sve.uqshl.nxv2i64( %pg, + %a, + %dup) + ret %out +} + +; ; UQSUB ; @@ -1878,6 +2070,11 @@ define @usra_i64( %a, %b ret %out } +declare @llvm.aarch64.sve.dup.x.nxv16i8(i8) +declare @llvm.aarch64.sve.dup.x.nxv8i16(i16) +declare @llvm.aarch64.sve.dup.x.nxv4i32(i32) +declare @llvm.aarch64.sve.dup.x.nxv2i64(i64) + declare @llvm.aarch64.sve.saba.nxv16i8(, , ) declare @llvm.aarch64.sve.saba.nxv8i16(, , ) declare @llvm.aarch64.sve.saba.nxv4i32(, , ) -- 2.7.4