From 3b334978d565bf39c80b6b81e459bd33d204e2a3 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 13 Jul 2022 14:21:48 +0100 Subject: [PATCH] [RISCV] Add a test showing a miscompilation with subreg liveness This patch adds a test which shows that we may incorrectly register allocate for RVV instructions which have no-overlap constraints on source/dest registers of different LMUL groups. The particular case shows that a vrgatherei16 instruction writes to a LMUL=1 register group v11 and reads from an EMUL=2 register group v10/v11. This breaks the overlap constraints of the vrgatherei16 instruction. The test also shows that disabling subregister liveness fixes the test. We use `early-clobber` on the `VR` dest and the `VRM2` source to enforce the constraint but with subregister liveness this constraint is not met. It's unclear to me at this point whether this is per-design of early-clobber in conjunction with subregisters (meaning we should find another way of expressing this constraint) or whether it's a bug in the register allocator somewhere. Reviewed By: rogfer01 Differential Revision: https://reviews.llvm.org/D129639 --- .../RISCV/rvv/vrgatherei16-subreg-liveness.ll | 73 ++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll new file mode 100644 index 0000000..7877082 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s -riscv-enable-subreg-liveness=false | FileCheck %s --check-prefix NOSUBREG + +; This test checks that vrgatherei16 instructions are correctly +; register-allocated. The LMUL=1 destination register groups may not overlap +; with the EMUL=2 source vector register groups. + +; FIXME: enabling subregister liveness results in incorrect register +; allocation! + +define internal void @foo( %v15, %0, %vs12.i.i.i, %1, %v37) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %loopIR.preheader.i.i +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv.v.i v14, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, mu +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vrgatherei16.vv v8, v9, v14 +; CHECK-NEXT: .LBB0_1: # %loopIR3.i.i +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vl1r.v v9, (zero) +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, mu +; CHECK-NEXT: vmv1r.v v11, v12 +; CHECK-NEXT: vrgatherei16.vv v11, v9, v10 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-NEXT: vand.vv v9, v8, v11 +; CHECK-NEXT: vs1r.v v9, (zero) +; CHECK-NEXT: j .LBB0_1 +; +; NOSUBREG-LABEL: foo: +; NOSUBREG: # %bb.0: # %loopIR.preheader.i.i +; NOSUBREG-NEXT: # kill: def $v10 killed $v10 def $v10m2 +; NOSUBREG-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; NOSUBREG-NEXT: vmv.v.i v14, 0 +; NOSUBREG-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; NOSUBREG-NEXT: vmv.v.i v9, 0 +; NOSUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, mu +; NOSUBREG-NEXT: vmv1r.v v8, v9 +; NOSUBREG-NEXT: vrgatherei16.vv v8, v9, v14 +; NOSUBREG-NEXT: .LBB0_1: # %loopIR3.i.i +; NOSUBREG-NEXT: # =>This Inner Loop Header: Depth=1 +; NOSUBREG-NEXT: vl1r.v v9, (zero) +; NOSUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, mu +; NOSUBREG-NEXT: vmv1r.v v13, v12 +; NOSUBREG-NEXT: vrgatherei16.vv v13, v9, v10 +; NOSUBREG-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; NOSUBREG-NEXT: vand.vv v9, v8, v13 +; NOSUBREG-NEXT: vs1r.v v9, (zero) +; NOSUBREG-NEXT: j .LBB0_1 +loopIR.preheader.i.i: + %v18 = tail call @llvm.vector.insert.nxv8i16.nxv1i16( poison, %vs12.i.i.i, i64 0) + br label %loopIR3.i.i + +loopIR3.i.i: ; preds = %loopIR3.i.i, %loopIR.preheader.i.i + %v376 = load , ptr addrspace(1) null, align 8 + %v38 = tail call @llvm.riscv.vrgatherei16.vv.nxv8i8.i64( zeroinitializer, zeroinitializer, zeroinitializer, i64 4) + %v40 = tail call @llvm.riscv.vrgatherei16.vv.nxv8i8.i64( %v37, %v376, %v18, i64 4) + %v42 = and %v38, %v40 + store %v42, ptr addrspace(1) null, align 4 + br label %loopIR3.i.i +} + +; Function Attrs: nocallback nofree nosync nounwind readnone willreturn +declare @llvm.vector.insert.nxv8i16.nxv1i16(, , i64 immarg) #0 + +; Function Attrs: nounwind readnone +declare @llvm.riscv.vrgatherei16.vv.nxv8i8.i64(, , , i64) #1 + +attributes #0 = { nocallback nofree nosync nounwind readnone willreturn } +attributes #1 = { nounwind readnone } -- 2.7.4