From: Craig Topper Date: Sat, 1 Oct 2022 23:31:23 +0000 (-0700) Subject: [RISCV] Use _TIED form of VWADD(U)_WX/VWSUB(U)_WX to avoid early clobber. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5bbc5eb55f0fbb508aeeab8d86a5051f5ac1849f;p=platform%2Fupstream%2Fllvm.git [RISCV] Use _TIED form of VWADD(U)_WX/VWSUB(U)_WX to avoid early clobber. One of the sources is the same size as the destination so that source doesn't have an overlap with the destination register. By using the _TIED form we avoid an early clobber contraint for that source. This matches what was already done for instrinsics. ConvertToThreeAddress will fix it if it can't stay tied. --- diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index b7b2564..067421f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -362,8 +362,9 @@ multiclass VPatWidenBinarySDNode_WV_WX(instruction_name#"_WV_"#vti.LMul.MX) - wti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; + (!cast(instruction_name#"_WV_"#vti.LMul.MX#"_TIED") + wti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, + TAIL_AGNOSTIC)>; def : Pat<(op (wti.Vector wti.RegClass:$rs2), (wti.Vector (extop (vti.Vector (SplatPat GPR:$rs1))))), (!cast(instruction_name#"_WX_"#vti.LMul.MX) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 97defa9..dc8a1bb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -374,6 +374,27 @@ multiclass VPatBinaryVL_V; } +multiclass VPatTiedBinaryNoMaskVL_V { + def : Pat<(result_type (vop + (result_type result_reg_class:$rs1), + (op2_type op2_reg_class:$rs2), + srcvalue, + true_mask, + VLOpFrag)), + (!cast(instruction_name#"_"#suffix#"_"# vlmul.MX#"_TIED") + result_reg_class:$rs1, + op2_reg_class:$rs2, + GPR:$vl, sew, TAIL_AGNOSTIC)>; +} + multiclass VPatBinaryVL_XI; defm : VPatBinaryVL_V @vwadd_vx_v8i16_i16(<8 x i8>* %x, i16* %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vlse16.v v10, (a1), zero -; CHECK-NEXT: vwadd.wv v8, v10, v9 +; CHECK-NEXT: vlse16.v v8, (a1), zero +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <8 x i8>, <8 x i8>* %x %b = load i16, i16* %y @@ -720,8 +720,8 @@ define <4 x i32> @vwadd_vx_v4i32_i32(<4 x i16>* %x, i32* %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vlse32.v v10, (a1), zero -; CHECK-NEXT: vwadd.wv v8, v10, v9 +; CHECK-NEXT: vlse32.v v8, (a1), zero +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <4 x i16>, <4 x i16>* %x %b = load i32, i32* %y @@ -743,8 +743,8 @@ define <2 x i64> @vwadd_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind { ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwadd.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwadd.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -776,8 +776,8 @@ define <2 x i64> @vwadd_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind { ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwadd.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwadd.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -809,8 +809,8 @@ define <2 x i64> @vwadd_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind { ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwadd.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwadd.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -842,8 +842,8 @@ define <2 x i64> @vwadd_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind { ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwadd.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwadd.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -851,8 +851,8 @@ define <2 x i64> @vwadd_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vlse64.v v10, (a1), zero -; RV64-NEXT: vwadd.wv v8, v10, v9 +; RV64-NEXT: vlse64.v v8, (a1), zero +; RV64-NEXT: vwadd.wv v8, v8, v9 ; RV64-NEXT: ret %a = load <2 x i32>, <2 x i32>* %x %b = load i64, i64* %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll index 7b94083..fda9bd8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll @@ -667,8 +667,8 @@ define <8 x i16> @vwaddu_vx_v8i16_i16(<8 x i8>* %x, i16* %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vlse16.v v10, (a1), zero -; CHECK-NEXT: vwaddu.wv v8, v10, v9 +; CHECK-NEXT: vlse16.v v8, (a1), zero +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <8 x i8>, <8 x i8>* %x %b = load i16, i16* %y @@ -720,8 +720,8 @@ define <4 x i32> @vwaddu_vx_v4i32_i32(<4 x i16>* %x, i32* %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vlse32.v v10, (a1), zero -; CHECK-NEXT: vwaddu.wv v8, v10, v9 +; CHECK-NEXT: vlse32.v v8, (a1), zero +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <4 x i16>, <4 x i16>* %x %b = load i32, i32* %y @@ -742,8 +742,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind { ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwaddu.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwaddu.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -774,8 +774,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind { ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwaddu.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwaddu.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -806,8 +806,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind { ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwaddu.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwaddu.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -839,8 +839,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind { ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwaddu.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwaddu.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -848,8 +848,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vlse64.v v10, (a1), zero -; RV64-NEXT: vwaddu.wv v8, v10, v9 +; RV64-NEXT: vlse64.v v8, (a1), zero +; RV64-NEXT: vwaddu.wv v8, v8, v9 ; RV64-NEXT: ret %a = load <2 x i32>, <2 x i32>* %x %b = load i64, i64* %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll index 8ed07f0..4704a32 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll @@ -650,9 +650,9 @@ define <8 x i16> @vwsub_vx_v8i16_i8(<8 x i8>* %x, i8* %y) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: lb a1, 0(a1) ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; CHECK-NEXT: vwsub.wv v8, v10, v9 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <8 x i8>, <8 x i8>* %x %b = load i8, i8* %y @@ -669,8 +669,8 @@ define <8 x i16> @vwsub_vx_v8i16_i16(<8 x i8>* %x, i16* %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vlse16.v v10, (a1), zero -; CHECK-NEXT: vwsub.wv v8, v10, v9 +; CHECK-NEXT: vlse16.v v8, (a1), zero +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <8 x i8>, <8 x i8>* %x %b = load i16, i16* %y @@ -687,9 +687,9 @@ define <4 x i32> @vwsub_vx_v4i32_i8(<4 x i16>* %x, i8* %y) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: lb a1, 0(a1) ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vwsub.wv v8, v10, v9 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <4 x i16>, <4 x i16>* %x %b = load i8, i8* %y @@ -707,9 +707,9 @@ define <4 x i32> @vwsub_vx_v4i32_i16(<4 x i16>* %x, i16* %y) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: lh a1, 0(a1) ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vwsub.wv v8, v10, v9 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <4 x i16>, <4 x i16>* %x %b = load i16, i16* %y @@ -726,8 +726,8 @@ define <4 x i32> @vwsub_vx_v4i32_i32(<4 x i16>* %x, i32* %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vlse32.v v10, (a1), zero -; CHECK-NEXT: vwsub.wv v8, v10, v9 +; CHECK-NEXT: vlse32.v v8, (a1), zero +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <4 x i16>, <4 x i16>* %x %b = load i32, i32* %y @@ -749,8 +749,8 @@ define <2 x i64> @vwsub_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind { ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwsub.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwsub.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -759,9 +759,9 @@ define <2 x i64> @vwsub_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind { ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: lb a1, 0(a1) ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: vmv.v.x v8, a1 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vwsub.wv v8, v10, v9 +; RV64-NEXT: vwsub.wv v8, v8, v9 ; RV64-NEXT: ret %a = load <2 x i32>, <2 x i32>* %x %b = load i8, i8* %y @@ -784,8 +784,8 @@ define <2 x i64> @vwsub_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind { ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwsub.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwsub.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -794,9 +794,9 @@ define <2 x i64> @vwsub_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind { ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: lh a1, 0(a1) ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: vmv.v.x v8, a1 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vwsub.wv v8, v10, v9 +; RV64-NEXT: vwsub.wv v8, v8, v9 ; RV64-NEXT: ret %a = load <2 x i32>, <2 x i32>* %x %b = load i16, i16* %y @@ -819,8 +819,8 @@ define <2 x i64> @vwsub_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind { ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwsub.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwsub.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -829,9 +829,9 @@ define <2 x i64> @vwsub_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind { ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: lw a1, 0(a1) ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: vmv.v.x v8, a1 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vwsub.wv v8, v10, v9 +; RV64-NEXT: vwsub.wv v8, v8, v9 ; RV64-NEXT: ret %a = load <2 x i32>, <2 x i32>* %x %b = load i32, i32* %y @@ -854,8 +854,8 @@ define <2 x i64> @vwsub_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind { ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwsub.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwsub.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -863,8 +863,8 @@ define <2 x i64> @vwsub_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vlse64.v v10, (a1), zero -; RV64-NEXT: vwsub.wv v8, v10, v9 +; RV64-NEXT: vlse64.v v8, (a1), zero +; RV64-NEXT: vwsub.wv v8, v8, v9 ; RV64-NEXT: ret %a = load <2 x i32>, <2 x i32>* %x %b = load i64, i64* %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll index 2e0ba48..258fbbe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll @@ -650,9 +650,9 @@ define <8 x i16> @vwsubu_vx_v8i16_i8(<8 x i8>* %x, i8* %y) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: lbu a1, 0(a1) ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; CHECK-NEXT: vwsubu.wv v8, v10, v9 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <8 x i8>, <8 x i8>* %x %b = load i8, i8* %y @@ -669,8 +669,8 @@ define <8 x i16> @vwsubu_vx_v8i16_i16(<8 x i8>* %x, i16* %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vlse16.v v10, (a1), zero -; CHECK-NEXT: vwsubu.wv v8, v10, v9 +; CHECK-NEXT: vlse16.v v8, (a1), zero +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <8 x i8>, <8 x i8>* %x %b = load i16, i16* %y @@ -687,9 +687,9 @@ define <4 x i32> @vwsubu_vx_v4i32_i8(<4 x i16>* %x, i8* %y) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: lbu a1, 0(a1) ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vwsubu.wv v8, v10, v9 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <4 x i16>, <4 x i16>* %x %b = load i8, i8* %y @@ -707,9 +707,9 @@ define <4 x i32> @vwsubu_vx_v4i32_i16(<4 x i16>* %x, i16* %y) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: lhu a1, 0(a1) ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vwsubu.wv v8, v10, v9 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <4 x i16>, <4 x i16>* %x %b = load i16, i16* %y @@ -726,8 +726,8 @@ define <4 x i32> @vwsubu_vx_v4i32_i32(<4 x i16>* %x, i32* %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vlse32.v v10, (a1), zero -; CHECK-NEXT: vwsubu.wv v8, v10, v9 +; CHECK-NEXT: vlse32.v v8, (a1), zero +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret %a = load <4 x i16>, <4 x i16>* %x %b = load i32, i32* %y @@ -748,8 +748,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind { ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwsubu.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwsubu.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -758,9 +758,9 @@ define <2 x i64> @vwsubu_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind { ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: vmv.v.x v8, a1 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vwsubu.wv v8, v10, v9 +; RV64-NEXT: vwsubu.wv v8, v8, v9 ; RV64-NEXT: ret %a = load <2 x i32>, <2 x i32>* %x %b = load i8, i8* %y @@ -782,8 +782,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind { ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwsubu.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwsubu.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -792,9 +792,9 @@ define <2 x i64> @vwsubu_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind { ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: lhu a1, 0(a1) ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: vmv.v.x v8, a1 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vwsubu.wv v8, v10, v9 +; RV64-NEXT: vwsubu.wv v8, v8, v9 ; RV64-NEXT: ret %a = load <2 x i32>, <2 x i32>* %x %b = load i16, i16* %y @@ -816,8 +816,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind { ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwsubu.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwsubu.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -826,9 +826,9 @@ define <2 x i64> @vwsubu_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind { ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: lwu a1, 0(a1) ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: vmv.v.x v8, a1 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vwsubu.wv v8, v10, v9 +; RV64-NEXT: vwsubu.wv v8, v8, v9 ; RV64-NEXT: ret %a = load <2 x i32>, <2 x i32>* %x %b = load i32, i32* %y @@ -851,8 +851,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind { ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vwsubu.wv v8, v10, v9 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vwsubu.wv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -860,8 +860,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vlse64.v v10, (a1), zero -; RV64-NEXT: vwsubu.wv v8, v10, v9 +; RV64-NEXT: vlse64.v v8, (a1), zero +; RV64-NEXT: vwsubu.wv v8, v8, v9 ; RV64-NEXT: ret %a = load <2 x i32>, <2 x i32>* %x %b = load i64, i64* %y diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll index f538817..5c23f98 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll @@ -62,8 +62,7 @@ define @vwadd_wv_nxv1i64( %va, %vb to %vd = add %va, %vc @@ -74,8 +73,7 @@ define @vwaddu_wv_nxv1i64( %va, %vb to %vd = add %va, %vc @@ -168,8 +166,7 @@ define @vwadd_wv_nxv2i64( %va, %vb to %vd = add %va, %vc @@ -180,8 +177,7 @@ define @vwaddu_wv_nxv2i64( %va, %vb to %vd = add %va, %vc @@ -274,8 +270,7 @@ define @vwadd_wv_nxv4i64( %va, %vb to %vd = add %va, %vc @@ -286,8 +281,7 @@ define @vwaddu_wv_nxv4i64( %va, %vb to %vd = add %va, %vc @@ -380,8 +374,7 @@ define @vwadd_wv_nxv8i64( %va, %vb to %vd = add %va, %vc @@ -392,8 +385,7 @@ define @vwaddu_wv_nxv8i64( %va, %vb to %vd = add %va, %vc diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll index d25a574..c5b19d32 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll @@ -62,8 +62,7 @@ define @vwsub_wv_nxv1i64( %va, %vb to %vd = sub %va, %vc @@ -74,8 +73,7 @@ define @vwsubu_wv_nxv1i64( %va, %vb to %vd = sub %va, %vc @@ -168,8 +166,7 @@ define @vwsub_wv_nxv2i64( %va, %vb to %vd = sub %va, %vc @@ -180,8 +177,7 @@ define @vwsubu_wv_nxv2i64( %va, %vb to %vd = sub %va, %vc @@ -274,8 +270,7 @@ define @vwsub_wv_nxv4i64( %va, %vb to %vd = sub %va, %vc @@ -286,8 +281,7 @@ define @vwsubu_wv_nxv4i64( %va, %vb to %vd = sub %va, %vc @@ -380,8 +374,7 @@ define @vwsub_wv_nxv8i64( %va, %vb to %vd = sub %va, %vc @@ -392,8 +385,7 @@ define @vwsubu_wv_nxv8i64( %va, %vb to %vd = sub %va, %vc