From 2c8546107a91a7b9a31791452712676937df54fe Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 29 Apr 2020 19:05:32 -0700 Subject: [PATCH] [AArch64][SVE] Implement lowering for SIGN_EXTEND etc. of SVE predicates. Now using patterns, since there's a single-instruction lowering. (We could convert to VSELECT and pattern-match that, but there doesn't seem to be much point.) I think this might be the first instruction to use nested multiclasses this way? It seems like a good way to reduce duplication between different integer widths. Let me know if it seems like an improvement. Also, while I'm here, fix the return type of SETCC so we don't try to merge a sign-extend with a SETCC. Differential Revision: https://reviews.llvm.org/D79193 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 6 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 31 ++-- llvm/test/CodeGen/AArch64/sve-fcmp.ll | 24 +++ llvm/test/CodeGen/AArch64/sve-sext-zext.ll | 188 ++++++++++++++++++++++++ 4 files changed, 234 insertions(+), 15 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-sext-zext.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d5e549c..fa38ac6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -991,10 +991,12 @@ void AArch64TargetLowering::addQRTypeForNEON(MVT VT) { addTypeForNEON(VT, MVT::v4i32); } -EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, - EVT VT) const { +EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, + LLVMContext &C, EVT VT) const { if (!VT.isVector()) return MVT::i32; + if (VT.isScalableVector()) + return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount()); return VT.changeVectorElementTypeToInteger(); } diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 52d3c16..b7e86d2 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4077,20 +4077,25 @@ multiclass sve_int_dup_imm_pred_merge { (!cast(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, 0, 0), 0>; } -multiclass sve_int_dup_imm_pred_zero { - def _B : sve_int_dup_imm_pred<0b00, 0, asm, ZPR8, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm)>; - def _H : sve_int_dup_imm_pred<0b01, 0, asm, ZPR16, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm)>; - def _S : sve_int_dup_imm_pred<0b10, 0, asm, ZPR32, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm)>; - def _D : sve_int_dup_imm_pred<0b11, 0, asm, ZPR64, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm)>; - - def : InstAlias<"mov $Zd, $Pg/z, $imm", - (!cast(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm), 1>; - def : InstAlias<"mov $Zd, $Pg/z, $imm", - (!cast(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm), 1>; +multiclass sve_int_dup_imm_pred_zero_inst< + bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty, + ValueType predty, imm8_opt_lsl cpyimm> { + def NAME : sve_int_dup_imm_pred; def : InstAlias<"mov $Zd, $Pg/z, $imm", - (!cast(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm), 1>; - def : InstAlias<"mov $Zd, $Pg/z, $imm", - (!cast(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm), 1>; + (!cast(NAME) zprty:$Zd, PPRAny:$Pg, cpyimm:$imm), 1>; + def : Pat<(intty (zext (predty PPRAny:$Ps1))), + (!cast(NAME) PPRAny:$Ps1, 1, 0)>; + def : Pat<(intty (sext (predty PPRAny:$Ps1))), + (!cast(NAME) PPRAny:$Ps1, -1, 0)>; + def : Pat<(intty (anyext (predty PPRAny:$Ps1))), + (!cast(NAME) PPRAny:$Ps1, 1, 0)>; +} + +multiclass sve_int_dup_imm_pred_zero { + defm _B : sve_int_dup_imm_pred_zero_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1, cpy_imm8_opt_lsl_i8>; + defm _H : sve_int_dup_imm_pred_zero_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1, cpy_imm8_opt_lsl_i16>; + defm _S : sve_int_dup_imm_pred_zero_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1, cpy_imm8_opt_lsl_i32>; + defm _D : sve_int_dup_imm_pred_zero_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1, cpy_imm8_opt_lsl_i64>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll index f1426bb..cbafae60 100644 --- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll @@ -229,3 +229,27 @@ define @ueq_8f16( %x, % %y = fcmp ueq %x, %x2 ret %y } + +define @oeq_4f32_sext( %x, %x2) { +; CHECK-LABEL: oeq_4f32_sext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ret + %y = fcmp oeq %x, %x2 + %r = sext %y to + ret %r +} + +define @oeq_4f32_zext( %x, %x2) { +; CHECK-LABEL: oeq_4f32_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 +; CHECK-NEXT: ret + %y = fcmp oeq %x, %x2 + %r = zext %y to + ret %r +} diff --git a/llvm/test/CodeGen/AArch64/sve-sext-zext.ll b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll new file mode 100644 index 0000000..f9a527c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll @@ -0,0 +1,188 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +define @sext_i1_i8( %a) { +; CHECK-LABEL: sext_i1_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @sext_i1_i16( %a) { +; CHECK-LABEL: sext_i1_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @sext_i1_i32( %a) { +; CHECK-LABEL: sext_i1_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @sext_i1_i64( %a) { +; CHECK-LABEL: sext_i1_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @zext_i1_i8( %a) { +; CHECK-LABEL: zext_i1_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @zext_i1_i16( %a) { +; CHECK-LABEL: zext_i1_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @zext_i1_i32( %a) { +; CHECK-LABEL: zext_i1_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @zext_i1_i64( %a) { +; CHECK-LABEL: zext_i1_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @sext_i8_i16( %a) { +; CHECK-LABEL: sext_i8_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sxtb z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @sext_i8_i32( %a) { +; CHECK-LABEL: sext_i8_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxtb z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @sext_i8_i64( %a) { +; CHECK-LABEL: sext_i8_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtb z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @zext_i8_i16( %a) { +; CHECK-LABEL: zext_i8_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.h, z0.h, #0xff +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @zext_i8_i32( %a) { +; CHECK-LABEL: zext_i8_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.s, z0.s, #0xff +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @zext_i8_i64( %a) { +; CHECK-LABEL: zext_i8_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.d, z0.d, #0xff +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @sext_i16_i32( %a) { +; CHECK-LABEL: sext_i16_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @sext_i16_i64( %a) { +; CHECK-LABEL: sext_i16_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @zext_i16_i32( %a) { +; CHECK-LABEL: zext_i16_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.s, z0.s, #0xffff +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @zext_i16_i64( %a) { +; CHECK-LABEL: zext_i16_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.d, z0.d, #0xffff +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} + +define @sext_i32_i64( %a) { +; CHECK-LABEL: sext_i32_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %r = sext %a to + ret %r +} + +define @zext_i32_i64( %a) { +; CHECK-LABEL: zext_i32_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: ret + %r = zext %a to + ret %r +} -- 2.7.4