bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
if (!IsSelect) {
- bool IsLHS = MaskIndex < (int)NumElts;
- // For "undef" elements of -1, shuffle in element 0 instead.
- GatherIndicesLHS.push_back(
- DAG.getConstant(IsLHS ? std::max(MaskIndex, 0) : 0, DL, XLenVT));
- // TODO: If we're masking out unused elements anyway, it might produce
- // better code if we use the most-common element index instead of 0.
+ bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
+ GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
+ ? DAG.getConstant(MaskIndex, DL, XLenVT)
+ : DAG.getUNDEF(XLenVT));
GatherIndicesRHS.push_back(
- DAG.getConstant(IsLHS ? 0 : MaskIndex - NumElts, DL, XLenVT));
+ IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
+ : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
}
}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+target triple = "riscv64-unknown-unknown-elf"
+
+define dso_local <16 x i16> @interleave(<8 x i16> %v0, <8 x i16> %v1) {
+; CHECK-LABEL: interleave:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m2 def $v8m2
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, mu
+; CHECK-NEXT: vmv2r.v v28, v26
+; CHECK-NEXT: vslideup.vi v28, v8, 0
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; CHECK-NEXT: vmv.v.i v30, 0
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, tu, mu
+; CHECK-NEXT: vslideup.vi v28, v30, 8
+; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0)
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: vrgather.vv v12, v28, v10
+; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, mu
+; CHECK-NEXT: vslideup.vi v26, v8, 0
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, tu, mu
+; CHECK-NEXT: vslideup.vi v26, v30, 8
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; CHECK-NEXT: vid.v v28
+; CHECK-NEXT: vrgather.vv v8, v12, v28
+; CHECK-NEXT: lui a0, 11
+; CHECK-NEXT: addiw a0, a0, -1366
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: lui a0, %hi(.LCPI0_1)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_1)
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
+; CHECK-NEXT: vle16.v v28, (a0)
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
+; CHECK-NEXT: vrgather.vv v8, v26, v28, v0.t
+; CHECK-NEXT: ret
+entry:
+ %v2 = shufflevector <8 x i16> %v0, <8 x i16> poison, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
+ %v3 = shufflevector <8 x i16> %v1, <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %v4 = shufflevector <16 x i16> %v2, <16 x i16> %v3, <16 x i32> <i32 0, i32 16, i32 2, i32 17, i32 4, i32 18, i32 6, i32 19, i32 8, i32 20, i32 10, i32 21, i32 12, i32 22, i32 14, i32 23>
+ ret <16 x i16> %v4
+}
; LMULMAX2-NEXT: addi a0, zero, 2
; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; LMULMAX2-NEXT: vmv.s.x v0, a0
-; LMULMAX2-NEXT: lui a0, %hi(.LCPI1_0)
-; LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI1_0)
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX2-NEXT: vle32.v v27, (a0)
+; LMULMAX2-NEXT: vmv.v.i v27, 3
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m1, tu, mu
; LMULMAX2-NEXT: vrgather.vv v26, v9, v27, v0.t
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; LMULMAX2-NEXT: vrgather.vv v27, v10, v25
+; LMULMAX2-NEXT: vrgather.vv v28, v10, v25
; LMULMAX2-NEXT: addi a0, zero, 8
; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; LMULMAX2-NEXT: vmv.s.x v0, a0
-; LMULMAX2-NEXT: lui a0, %hi(.LCPI1_1)
-; LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI1_1)
-; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX2-NEXT: vle32.v v25, (a0)
-; LMULMAX2-NEXT: vsetvli zero, zero, e32, m1, tu, mu
-; LMULMAX2-NEXT: vrgather.vv v27, v11, v25, v0.t
+; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, tu, mu
+; LMULMAX2-NEXT: vrgather.vv v28, v11, v27, v0.t
; LMULMAX2-NEXT: addi a0, zero, 3
; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; LMULMAX2-NEXT: vmv.s.x v0, a0
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX2-NEXT: vmerge.vvm v8, v27, v26, v0
+; LMULMAX2-NEXT: vmerge.vvm v8, v28, v26, v0
; LMULMAX2-NEXT: ret
%z = shufflevector <8 x float> %x, <8 x float> %y, <4 x i32> <i32 0, i32 7, i32 8, i32 15>
ret <4 x float> %z
define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) {
; RV32-LABEL: vrgather_shuffle_vv_v4f64:
; RV32: # %bb.0:
-; RV32-NEXT: addi a0, zero, 1
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; RV32-NEXT: vmv.s.x v25, a0
-; RV32-NEXT: vmv.v.i v28, 0
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
-; RV32-NEXT: vslideup.vi v28, v25, 3
; RV32-NEXT: lui a0, %hi(.LCPI6_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI6_0)
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-NEXT: vle16.v v25, (a0)
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; RV32-NEXT: vrgatherei16.vv v26, v8, v25
; RV32-NEXT: addi a0, zero, 8
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu
-; RV32-NEXT: vrgatherei16.vv v26, v10, v28, v0.t
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; RV32-NEXT: vmv.v.i v25, 1
+; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu
+; RV32-NEXT: vrgatherei16.vv v26, v10, v25, v0.t
; RV32-NEXT: vmv2r.v v8, v26
; RV32-NEXT: ret
;
; RV64-LABEL: vrgather_shuffle_vv_v4f64:
; RV64: # %bb.0:
-; RV64-NEXT: addi a0, zero, 1
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
-; RV64-NEXT: vmv.s.x v26, a0
-; RV64-NEXT: vmv.v.i v28, 0
-; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu
-; RV64-NEXT: vslideup.vi v28, v26, 3
; RV64-NEXT: lui a0, %hi(.LCPI6_0)
; RV64-NEXT: addi a0, a0, %lo(.LCPI6_0)
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu
-; RV64-NEXT: vle64.v v30, (a0)
-; RV64-NEXT: vrgather.vv v26, v8, v30
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
+; RV64-NEXT: vle64.v v28, (a0)
+; RV64-NEXT: vrgather.vv v26, v8, v28
; RV64-NEXT: addi a0, zero, 8
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV64-NEXT: vmv.s.x v0, a0
-; RV64-NEXT: vsetivli zero, 4, e64, m2, tu, mu
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
+; RV64-NEXT: vmv.v.i v28, 1
+; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu
; RV64-NEXT: vrgather.vv v26, v10, v28, v0.t
; RV64-NEXT: vmv2r.v v8, v26
; RV64-NEXT: ret
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: lui a0, %hi(.LCPI7_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; RV32-NEXT: vle16.v v25, (a0)
-; RV32-NEXT: lui a0, %hi(.LCPI7_1)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI7_1)
-; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v26, (a0), zero
-; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu
+; RV32-NEXT: lui a0, 16
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: vmv.v.x v25, a0
+; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu
; RV32-NEXT: vrgatherei16.vv v26, v8, v25, v0.t
; RV32-NEXT: vmv2r.v v8, v26
; RV32-NEXT: ret
;
; RV64-LABEL: vrgather_shuffle_xv_v4f64:
; RV64: # %bb.0:
+; RV64-NEXT: addi a0, zero, 2
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
+; RV64-NEXT: vmv.s.x v26, a0
+; RV64-NEXT: vmv.v.i v28, 1
+; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, mu
+; RV64-NEXT: vslideup.vi v28, v26, 2
; RV64-NEXT: addi a0, zero, 12
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV64-NEXT: vmv.s.x v0, a0
; RV64-NEXT: lui a0, %hi(.LCPI7_0)
; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0)
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
-; RV64-NEXT: vle64.v v28, (a0)
-; RV64-NEXT: lui a0, %hi(.LCPI7_1)
-; RV64-NEXT: addi a0, a0, %lo(.LCPI7_1)
; RV64-NEXT: vlse64.v v26, (a0), zero
; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu
; RV64-NEXT: vrgather.vv v26, v8, v28, v0.t
; RV32-LABEL: vrgather_shuffle_vx_v4f64:
; RV32: # %bb.0:
; RV32-NEXT: addi a0, zero, 3
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; RV32-NEXT: vmv.s.x v25, a0
-; RV32-NEXT: vmv.v.i v28, 0
-; RV32-NEXT: vsetivli zero, 2, e16, mf2, tu, mu
-; RV32-NEXT: vslideup.vi v28, v25, 1
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: lui a0, %hi(.LCPI8_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI8_0)
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v26, (a0), zero
-; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu
-; RV32-NEXT: vrgatherei16.vv v26, v8, v28, v0.t
+; RV32-NEXT: lui a0, 48
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: vmv.v.x v25, a0
+; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu
+; RV32-NEXT: vrgatherei16.vv v26, v8, v25, v0.t
; RV32-NEXT: vmv2r.v v8, v26
; RV32-NEXT: ret
;
; RV64-LABEL: vrgather_shuffle_vx_v4f64:
; RV64: # %bb.0:
-; RV64-NEXT: addi a0, zero, 3
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
-; RV64-NEXT: vmv.s.x v26, a0
-; RV64-NEXT: vmv.v.i v28, 0
-; RV64-NEXT: vsetivli zero, 2, e64, m2, tu, mu
-; RV64-NEXT: vslideup.vi v28, v26, 1
+; RV64-NEXT: vmv.v.i v28, 3
+; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu
+; RV64-NEXT: vmv.s.x v28, zero
+; RV64-NEXT: addi a0, zero, 3
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV64-NEXT: vmv.s.x v0, a0
; RV64-NEXT: lui a0, %hi(.LCPI8_0)
define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) {
; CHECK-LABEL: vrgather_shuffle_vv_v4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, zero, 1
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT: vmv.s.x v25, a0
-; CHECK-NEXT: vmv.v.i v26, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
-; CHECK-NEXT: vslideup.vi v26, v25, 3
; CHECK-NEXT: lui a0, %hi(.LCPI6_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0)
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vle16.v v27, (a0)
-; CHECK-NEXT: vrgather.vv v25, v8, v27
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v26, (a0)
+; CHECK-NEXT: vrgather.vv v25, v8, v26
; CHECK-NEXT: addi a0, zero, 8
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; CHECK-NEXT: vmv.s.x v0, a0
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, mu
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vmv.v.i v26, 1
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
; CHECK-NEXT: vrgather.vv v25, v9, v26, v0.t
; CHECK-NEXT: vmv1r.v v8, v25
; CHECK-NEXT: ret
}
define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16> %x) {
-; CHECK-LABEL: vrgather_shuffle_xv_v4i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, zero, 12
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
-; CHECK-NEXT: vmv.s.x v0, a0
-; CHECK-NEXT: lui a0, %hi(.LCPI7_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0)
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT: vle16.v v26, (a0)
-; CHECK-NEXT: vmv.v.i v25, 5
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
-; CHECK-NEXT: vrgather.vv v25, v8, v26, v0.t
-; CHECK-NEXT: vmv1r.v v8, v25
-; CHECK-NEXT: ret
+; RV32-LABEL: vrgather_shuffle_xv_v4i16:
+; RV32: # %bb.0:
+; RV32-NEXT: addi a0, zero, 12
+; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
+; RV32-NEXT: vmv.s.x v0, a0
+; RV32-NEXT: lui a0, 16
+; RV32-NEXT: addi a0, a0, 2
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: vmv.v.x v26, a0
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; RV32-NEXT: vmv.v.i v25, 5
+; RV32-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
+; RV32-NEXT: vrgather.vv v25, v8, v26, v0.t
+; RV32-NEXT: vmv1r.v v8, v25
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrgather_shuffle_xv_v4i16:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a0, zero, 12
+; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
+; RV64-NEXT: vmv.s.x v0, a0
+; RV64-NEXT: lui a0, 16
+; RV64-NEXT: addiw a0, a0, 2
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: vmv.v.x v26, a0
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; RV64-NEXT: vmv.v.i v25, 5
+; RV64-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
+; RV64-NEXT: vrgather.vv v25, v8, v26, v0.t
+; RV64-NEXT: vmv1r.v v8, v25
+; RV64-NEXT: ret
%s = shufflevector <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i16> %x, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
ret <4 x i16> %s
}
; CHECK-LABEL: vrgather_shuffle_vx_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, zero, 3
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT: vmv.s.x v25, a0
-; CHECK-NEXT: vmv.v.i v26, 0
-; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, mu
-; CHECK-NEXT: vslideup.vi v26, v25, 1
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: lui a0, 48
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; CHECK-NEXT: vmv.v.x v26, a0
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vmv.v.i v25, 5
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
; RV32-NEXT: addi a0, zero, 5
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vmv.s.x v25, a0
-; RV32-NEXT: addi a0, zero, 36
-; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
-; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV32-NEXT: vmv.v.i v26, 0
-; RV32-NEXT: vmerge.vim v26, v26, 2, v0
+; RV32-NEXT: vmv.v.i v26, 2
; RV32-NEXT: vsetvli zero, zero, e16, m1, tu, mu
; RV32-NEXT: vslideup.vi v26, v25, 7
; RV32-NEXT: lui a0, %hi(.LCPI11_0)
; RV64-NEXT: addi a0, zero, 5
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: vmv.s.x v28, a0
-; RV64-NEXT: addi a0, zero, 36
-; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
-; RV64-NEXT: vmv.s.x v0, a0
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT: vmv.v.i v16, 0
-; RV64-NEXT: vmerge.vim v16, v16, 2, v0
+; RV64-NEXT: vmv.v.i v16, 2
; RV64-NEXT: vsetvli zero, zero, e64, m4, tu, mu
; RV64-NEXT: vslideup.vi v16, v28, 7
; RV64-NEXT: lui a0, %hi(.LCPI11_0)
define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) {
; RV32-LABEL: vrgather_shuffle_xv_v8i64:
; RV32: # %bb.0:
-; RV32-NEXT: addi a0, zero, 6
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV32-NEXT: vmv.s.x v25, a0
-; RV32-NEXT: addi a0, zero, 4
-; RV32-NEXT: vmv.s.x v26, a0
-; RV32-NEXT: vmv.v.i v27, 0
-; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, mu
-; RV32-NEXT: vslideup.vi v27, v26, 5
-; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, mu
-; RV32-NEXT: vslideup.vi v27, v25, 6
; RV32-NEXT: lui a0, %hi(.LCPI12_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: addi a0, zero, 113
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vsetivli zero, 8, e64, m4, tu, mu
-; RV32-NEXT: vrgatherei16.vv v28, v8, v27, v0.t
+; RV32-NEXT: lui a0, %hi(.LCPI12_1)
+; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1)
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; RV32-NEXT: vle16.v v25, (a0)
+; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, mu
+; RV32-NEXT: vrgatherei16.vv v28, v8, v25, v0.t
; RV32-NEXT: vmv4r.v v8, v28
; RV32-NEXT: ret
;
; RV64-LABEL: vrgather_shuffle_xv_v8i64:
; RV64: # %bb.0:
-; RV64-NEXT: addi a0, zero, 6
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT: vmv.s.x v28, a0
-; RV64-NEXT: addi a0, zero, 4
-; RV64-NEXT: vmv.s.x v12, a0
-; RV64-NEXT: vmv.v.i v16, 0
-; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, mu
-; RV64-NEXT: vslideup.vi v16, v12, 5
-; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, mu
-; RV64-NEXT: vslideup.vi v16, v28, 6
; RV64-NEXT: addi a0, zero, 113
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV64-NEXT: vmv.s.x v0, a0
+; RV64-NEXT: lui a0, %hi(.LCPI12_0)
+; RV64-NEXT: addi a0, a0, %lo(.LCPI12_0)
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
+; RV64-NEXT: vle64.v v12, (a0)
; RV64-NEXT: vmv.v.i v28, -1
; RV64-NEXT: vsetvli zero, zero, e64, m4, tu, mu
-; RV64-NEXT: vrgather.vv v28, v8, v16, v0.t
+; RV64-NEXT: vrgather.vv v28, v8, v12, v0.t
; RV64-NEXT: vmv4r.v v8, v28
; RV64-NEXT: ret
%s = shufflevector <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x, <8 x i32> <i32 8, i32 3, i32 6, i32 5, i32 8, i32 12, i32 14, i32 3>
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vrgather.vi v25, v8, 1
-; CHECK-NEXT: addi a1, zero, 1
-; CHECK-NEXT: vmv.s.x v26, a1
-; CHECK-NEXT: vmv.v.i v27, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, mu
-; CHECK-NEXT: vslideup.vi v27, v26, 3
+; CHECK-NEXT: vmv.s.x v26, zero
+; CHECK-NEXT: vmv.v.i v27, 1
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, mu
+; CHECK-NEXT: vslideup.vi v27, v26, 1
; CHECK-NEXT: addi a1, zero, 10
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; CHECK-NEXT: vmv.s.x v0, a1
; RV64-1024-NEXT: vslideup.vi v28, v8, 0
; RV64-1024-NEXT: vsetvli zero, a1, e16, m4, tu, mu
; RV64-1024-NEXT: vslideup.vx v28, v12, a3
-; RV64-1024-NEXT: lui a2, %hi(.LCPI0_1)
-; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI0_1)
; RV64-1024-NEXT: vsetvli zero, zero, e16, m4, ta, mu
-; RV64-1024-NEXT: vle16.v v12, (a2)
+; RV64-1024-NEXT: vid.v v12
; RV64-1024-NEXT: vrgather.vv v8, v24, v12
; RV64-1024-NEXT: lui a2, 1026731
; RV64-1024-NEXT: addiw a2, a2, -1365
; RV64-1024-NEXT: vslideup.vi v0, v25, 2
; RV64-1024-NEXT: vsetivli zero, 4, e64, m1, tu, mu
; RV64-1024-NEXT: vslideup.vi v0, v25, 3
-; RV64-1024-NEXT: lui a2, %hi(.LCPI0_2)
-; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI0_2)
+; RV64-1024-NEXT: lui a2, %hi(.LCPI0_1)
+; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI0_1)
; RV64-1024-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; RV64-1024-NEXT: vle16.v v12, (a2)
; RV64-1024-NEXT: vsetvli zero, zero, e16, m4, tu, mu
; RV64-2048-NEXT: vslideup.vi v26, v30, 0
; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, tu, mu
; RV64-2048-NEXT: vslideup.vx v26, v28, a3
-; RV64-2048-NEXT: lui a2, %hi(.LCPI0_1)
-; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI0_1)
; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; RV64-2048-NEXT: vle16.v v28, (a2)
+; RV64-2048-NEXT: vid.v v28
; RV64-2048-NEXT: vrgather.vv v30, v12, v28
; RV64-2048-NEXT: lui a2, 1026731
; RV64-2048-NEXT: addiw a2, a2, -1365
; RV64-2048-NEXT: vslideup.vi v0, v25, 2
; RV64-2048-NEXT: vsetivli zero, 4, e64, m1, tu, mu
; RV64-2048-NEXT: vslideup.vi v0, v25, 3
-; RV64-2048-NEXT: lui a2, %hi(.LCPI0_2)
-; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI0_2)
+; RV64-2048-NEXT: lui a2, %hi(.LCPI0_1)
+; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI0_1)
; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, ta, mu
; RV64-2048-NEXT: vle16.v v28, (a2)
; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, tu, mu
; RV64-1024-NEXT: addi a2, sp, 16
; RV64-1024-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload
; RV64-1024-NEXT: vslideup.vx v8, v16, a3
-; RV64-1024-NEXT: lui a2, %hi(.LCPI1_1)
-; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI1_1)
; RV64-1024-NEXT: vsetvli zero, zero, e16, m8, ta, mu
-; RV64-1024-NEXT: vle16.v v24, (a2)
+; RV64-1024-NEXT: vid.v v24
; RV64-1024-NEXT: csrr a2, vlenb
; RV64-1024-NEXT: slli a2, a2, 3
; RV64-1024-NEXT: add a2, sp, a2
; RV64-1024-NEXT: vslideup.vi v0, v25, 6
; RV64-1024-NEXT: vsetivli zero, 8, e64, m1, tu, mu
; RV64-1024-NEXT: vslideup.vi v0, v25, 7
-; RV64-1024-NEXT: lui a2, %hi(.LCPI1_2)
-; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI1_2)
+; RV64-1024-NEXT: lui a2, %hi(.LCPI1_1)
+; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI1_1)
; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, mu
; RV64-1024-NEXT: vle16.v v24, (a2)
; RV64-1024-NEXT: vsetvli zero, zero, e16, m8, tu, mu
; RV64-2048-NEXT: vslideup.vi v28, v8, 0
; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, tu, mu
; RV64-2048-NEXT: vslideup.vx v28, v12, a3
-; RV64-2048-NEXT: lui a2, %hi(.LCPI1_1)
-; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI1_1)
; RV64-2048-NEXT: vsetvli zero, zero, e16, m4, ta, mu
-; RV64-2048-NEXT: vle16.v v12, (a2)
+; RV64-2048-NEXT: vid.v v12
; RV64-2048-NEXT: vrgather.vv v8, v24, v12
; RV64-2048-NEXT: lui a2, 1026731
; RV64-2048-NEXT: addiw a2, a2, -1365
; RV64-2048-NEXT: vslideup.vi v0, v25, 6
; RV64-2048-NEXT: vsetivli zero, 8, e64, m1, tu, mu
; RV64-2048-NEXT: vslideup.vi v0, v25, 7
-; RV64-2048-NEXT: lui a2, %hi(.LCPI1_2)
-; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI1_2)
+; RV64-2048-NEXT: lui a2, %hi(.LCPI1_1)
+; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI1_1)
; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; RV64-2048-NEXT: vle16.v v12, (a2)
; RV64-2048-NEXT: vsetvli zero, zero, e16, m4, tu, mu