Since we use match shl (v, splat 1) to vadd, we could also expand to widening add.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D153112
defm : VPatWidenBinarySDNode_VV_VX_WV_WX<sub, zext_oneuse, "PseudoVWSUBU">;
defm : VPatWidenBinarySDNode_VV_VX_WV_WX<sub, anyext_oneuse, "PseudoVWSUBU">;
+// shl (ext v, splat 1) is a special case of widening add.
+foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(shl (wti.Vector (sext_oneuse (vti.Vector vti.RegClass:$rs1))),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
+ (!cast<Instruction>("PseudoVWADD_VV_"#vti.LMul.MX)
+ vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs1))),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
+ (!cast<Instruction>("PseudoVWADDU_VV_"#vti.LMul.MX)
+ vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ def : Pat<(shl (wti.Vector (anyext_oneuse (vti.Vector vti.RegClass:$rs1))),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
+ (!cast<Instruction>("PseudoVWADDU_VV_"#vti.LMul.MX)
+ vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ }
+}
+
// 11.3. Vector Integer Extension
defm : VPatExtendSDNode_V<[zext, anyext], "PseudoVZEXT", "VF2",
AllFractionableVF2IntVectors>;
define <vscale x 8 x i16> @mgather_baseidx_nxv8i16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) {
; RV32-LABEL: mgather_baseidx_nxv8i16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v8
-; RV32-NEXT: vadd.vv v12, v12, v12
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, mu
+; RV32-NEXT: vwadd.vv v12, v8, v8
; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
define <vscale x 8 x half> @mgather_baseidx_nxv8f16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) {
; RV32-LABEL: mgather_baseidx_nxv8f16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v8
-; RV32-NEXT: vadd.vv v12, v12, v12
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, mu
+; RV32-NEXT: vwadd.vv v12, v8, v8
; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
define void @mscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
; RV32-LABEL: mscatter_baseidx_nxv8i16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v10
-; RV32-NEXT: vadd.vv v12, v12, v12
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; RV32-NEXT: vwadd.vv v12, v10, v10
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
define void @mscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
; RV32-LABEL: mscatter_baseidx_nxv8f16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v10
-; RV32-NEXT: vadd.vv v12, v12, v12
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; RV32-NEXT: vwadd.vv v12, v10, v10
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
define <vscale x 8 x i16> @vpgather_baseidx_nxv8i16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v8
-; RV32-NEXT: vadd.vv v12, v12, v12
+; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT: vwadd.vv v12, v8, v8
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
define <vscale x 8 x half> @vpgather_baseidx_nxv8f16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8f16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v8
-; RV32-NEXT: vadd.vv v12, v12, v12
+; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT: vwadd.vv v12, v8, v8
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
define void @vpscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpscatter_baseidx_nxv8i16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v10
-; RV32-NEXT: vadd.vv v12, v12, v12
+; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT: vwadd.vv v12, v10, v10
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
define void @vpscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpscatter_baseidx_nxv8f16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v10
-; RV32-NEXT: vadd.vv v12, v12, v12
+; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT: vwadd.vv v12, v10, v10
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret