From: Fraser Cormack Date: Tue, 8 Jun 2021 13:55:31 +0000 (+0100) Subject: [RISCV] Support CONCAT_VECTORS on scalable masks X-Git-Tag: llvmorg-14-init~4475 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e8f1f891031385a34f0548803f3bc76ce50544c1;p=platform%2Fupstream%2Fllvm.git [RISCV] Support CONCAT_VECTORS on scalable masks This patch is a simple fix which registers CONCAT_VECTORS as custom-lowered for scalable mask vectors. This follows the pattern of all other scalable-vector types, as the default expansion of CONCAT_VECTORS cannot handle scalable types, and even if it did it'd go through the stack and generate worse code. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D103896 --- diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ef748ce..8439fea 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -441,6 +441,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Mask VTs are custom-expanded into a series of standard nodes setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll index d43d7c89..c4ee90b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; FIXME: The scalar/vector operations ('fv' tests) should swap operands and @@ -2507,4 +2507,23 @@ define @fcmp_uno_vf_nxv8f64_nonans( %va, ret %vc } +; This fcmp/setcc is split and so we find a scalable-vector mask CONCAT_VECTOR +; node. Ensure we correctly (custom) lower this. +define @fcmp_oeq_vf_nx16f64( %va) { +; CHECK-LABEL: fcmp_oeq_vf_nx16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.d.w ft0, zero +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmfeq.vf v25, v16, ft0 +; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vslideup.vx v0, v25, a0 +; CHECK-NEXT: ret + %vc = fcmp oeq %va, zeroinitializer + ret %vc +} + attributes #0 = { "no-nans-fp-math"="true" } diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll index 60edaa1..6ddaa04 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; FIXME: The scalar/vector operations ('fv' tests) should swap operands and @@ -2507,4 +2507,23 @@ define @fcmp_uno_vf_nxv8f64_nonans( %va, ret %vc } +; This fcmp/setcc is split and so we find a scalable-vector mask CONCAT_VECTOR +; node. Ensure we correctly (custom) lower this. +define @fcmp_oeq_vf_nx16f64( %va) { +; CHECK-LABEL: fcmp_oeq_vf_nx16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.d.x ft0, zero +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmfeq.vf v25, v16, ft0 +; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vslideup.vx v0, v25, a0 +; CHECK-NEXT: ret + %vc = fcmp oeq %va, zeroinitializer + ret %vc +} + attributes #0 = { "no-nans-fp-math"="true" } diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll index 75d6e28..bf7c9e2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s define @icmp_eq_vv_nxv8i8( %va, %vb) { ; CHECK-LABEL: icmp_eq_vv_nxv8i8: @@ -3087,3 +3087,20 @@ define @icmp_eq_ii_nxv8i8() { ret %vc } +; This icmp/setcc is split and so we find a scalable-vector mask CONCAT_VECTOR +; node. Ensure we correctly (custom) lower this. +define @icmp_eq_vi_nx16i64( %va) { +; CHECK-LABEL: icmp_eq_vi_nx16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; CHECK-NEXT: vmseq.vi v25, v16, 0 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vslideup.vx v0, v25, a0 +; CHECK-NEXT: ret + %vc = icmp eq %va, zeroinitializer + ret %vc +} diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv64.ll index 49ee566..199c07c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s define @icmp_eq_vv_nxv8i8( %va, %vb) { ; CHECK-LABEL: icmp_eq_vv_nxv8i8: @@ -2941,3 +2941,20 @@ define @icmp_sle_vi_nxv8i64_0( %va) { ret %vc } +; This icmp/setcc is split and so we find a scalable-vector mask CONCAT_VECTOR +; node. Ensure we correctly (custom) lower this. +define @icmp_eq_vi_nx16i64( %va) { +; CHECK-LABEL: icmp_eq_vi_nx16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; CHECK-NEXT: vmseq.vi v25, v16, 0 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vslideup.vx v0, v25, a0 +; CHECK-NEXT: ret + %vc = icmp eq %va, zeroinitializer + ret %vc +}