From 10879c26a20307abb3f0d29da6504f6cc9cc41e3 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Fri, 28 Jan 2022 12:14:12 +0000 Subject: [PATCH] [RISCV] Add tests for possible splat optimizations These splats -- whether BUILD_VECTOR or SPLAT_VECTOR -- are formed by first extracting a value from a vector and splatting it to all elements of the destination vector. These could be performed more optimally, avoiding the drop to scalar, using RVV's vrgather, for example. --- .../CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll | 81 ++++++++++++++ .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 56 ++++++++++ llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll | 123 +++++++++++++++++++++ 3 files changed, 260 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index a7cf523..3d19539 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -200,3 +200,84 @@ define void @buildvec_merge0_v4f32(<4 x float>* %x, float %f) { store <4 x float> %v3, <4 x float>* %x ret void } + +define <4 x half> @splat_c3_v4f16(<4 x half> %v) { +; CHECK-LABEL: splat_c3_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v9, v8, 3 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %x = extractelement <4 x half> %v, i32 3 + %ins = insertelement <4 x half> poison, half %x, i32 0 + %splat = shufflevector <4 x half> %ins, <4 x half> poison, <4 x i32> zeroinitializer + ret <4 x half> %splat +} + +define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { +; CHECK-LABEL: splat_idx_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: ret + %x = extractelement <4 x half> %v, i64 %idx + %ins = insertelement <4 x half> poison, half %x, i32 0 + %splat = shufflevector <4 x half> %ins, <4 x half> poison, <4 x i32> zeroinitializer + ret <4 x half> %splat +} + +define <8 x float> @splat_c5_v8f32(<8 x float> %v) { +; LMULMAX1-LABEL: splat_c5_v8f32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; LMULMAX1-NEXT: vrgather.vi v8, v9, 1 +; LMULMAX1-NEXT: vmv.v.v v9, v8 +; LMULMAX1-NEXT: ret +; +; LMULMAX2-LABEL: splat_c5_v8f32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; LMULMAX2-NEXT: vrgather.vi v10, v8, 5 +; LMULMAX2-NEXT: vmv.v.v v8, v10 +; LMULMAX2-NEXT: ret + %x = extractelement <8 x float> %v, i32 5 + %ins = insertelement <8 x float> poison, float %x, i32 0 + %splat = shufflevector <8 x float> %ins, <8 x float> poison, <8 x i32> zeroinitializer + ret <8 x float> %splat +} + +define <8 x float> @splat_idx_v8f32(<8 x float> %v, i64 %idx) { +; LMULMAX1-LABEL: splat_idx_v8f32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi sp, sp, -48 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 48 +; LMULMAX1-NEXT: andi a0, a0, 7 +; LMULMAX1-NEXT: slli a0, a0, 2 +; LMULMAX1-NEXT: addi a1, sp, 16 +; LMULMAX1-NEXT: add a0, a1, a0 +; LMULMAX1-NEXT: addi a1, sp, 32 +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; LMULMAX1-NEXT: vse32.v v9, (a1) +; LMULMAX1-NEXT: addi a1, sp, 16 +; LMULMAX1-NEXT: vse32.v v8, (a1) +; LMULMAX1-NEXT: vlse32.v v8, (a0), zero +; LMULMAX1-NEXT: vmv.v.v v9, v8 +; LMULMAX1-NEXT: addi sp, sp, 48 +; LMULMAX1-NEXT: ret +; +; LMULMAX2-LABEL: splat_idx_v8f32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli zero, 1, e32, m2, ta, mu +; LMULMAX2-NEXT: vslidedown.vx v8, v8, a0 +; LMULMAX2-NEXT: vfmv.f.s ft0, v8 +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; LMULMAX2-NEXT: vfmv.v.f v8, ft0 +; LMULMAX2-NEXT: ret + %x = extractelement <8 x float> %v, i64 %idx + %ins = insertelement <8 x float> poison, float %x, i32 0 + %splat = shufflevector <8 x float> %ins, <8 x float> poison, <8 x i32> zeroinitializer + ret <8 x float> %splat +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index f4bb614..80c4ad8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -648,3 +648,59 @@ define void @buildvec_vid_shl_imm_v8i16(<8 x i16>* %x) { store <8 x i16> , <8 x i16>* %x ret void } + +define <4 x i32> @splat_c3_v4i32(<4 x i32> %v) { +; CHECK-LABEL: splat_c3_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vrgather.vi v9, v8, 3 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %x = extractelement <4 x i32> %v, i32 3 + %ins = insertelement <4 x i32> poison, i32 %x, i32 0 + %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %splat +} + +define <4 x i32> @splat_idx_v4i32(<4 x i32> %v, i64 %idx) { +; CHECK-LABEL: splat_idx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %x = extractelement <4 x i32> %v, i64 %idx + %ins = insertelement <4 x i32> poison, i32 %x, i32 0 + %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %splat +} + +define <8 x i16> @splat_c4_v8i16(<8 x i16> %v) { +; CHECK-LABEL: splat_c4_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vrgather.vi v9, v8, 4 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %x = extractelement <8 x i16> %v, i32 4 + %ins = insertelement <8 x i16> poison, i16 %x, i32 0 + %splat = shufflevector <8 x i16> %ins, <8 x i16> poison, <8 x i32> zeroinitializer + ret <8 x i16> %splat +} + +define <8 x i16> @splat_idx_v8i16(<8 x i16> %v, i64 %idx) { +; CHECK-LABEL: splat_idx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %x = extractelement <8 x i16> %v, i64 %idx + %ins = insertelement <8 x i16> poison, i16 %x, i32 0 + %splat = shufflevector <8 x i16> %ins, <8 x i16> poison, <8 x i32> zeroinitializer + ret <8 x i16> %splat +} diff --git a/llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll new file mode 100644 index 0000000..c68e310 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll @@ -0,0 +1,123 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs < %s | FileCheck %s + +define @splat_c3_nxv4i32( %v) { +; CHECK-LABEL: splat_c3_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %x = extractelement %v, i32 3 + %ins = insertelement poison, i32 %x, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + ret %splat +} + +define @splat_idx_nxv4i32( %v, i64 %idx) { +; CHECK-LABEL: splat_idx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %x = extractelement %v, i64 %idx + %ins = insertelement poison, i32 %x, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + ret %splat +} + +define @splat_c4_nxv8i16( %v) { +; CHECK-LABEL: splat_c4_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %x = extractelement %v, i32 4 + %ins = insertelement poison, i16 %x, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + ret %splat +} + +define @splat_idx_nxv8i16( %v, i64 %idx) { +; CHECK-LABEL: splat_idx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %x = extractelement %v, i64 %idx + %ins = insertelement poison, i16 %x, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + ret %splat +} + +define @splat_c1_nxv2f16( %v) { +; CHECK-LABEL: splat_c1_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: ret + %x = extractelement %v, i32 1 + %ins = insertelement poison, half %x, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + ret %splat +} + +define @splat_idx_nxv2f16( %v, i64 %idx) { +; CHECK-LABEL: splat_idx_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: ret + %x = extractelement %v, i64 %idx + %ins = insertelement poison, half %x, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + ret %splat +} + +define @splat_c3_nxv4f32( %v) { +; CHECK-LABEL: splat_c3_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: ret + %x = extractelement %v, i64 3 + %ins = insertelement poison, float %x, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + ret %splat +} + +define @splat_idx_nxv4f32( %v, i64 %idx) { +; CHECK-LABEL: splat_idx_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: ret + %x = extractelement %v, i64 %idx + %ins = insertelement poison, float %x, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + ret %splat +} -- 2.7.4