With fix for AArch64 and Hexgon test cases.
int Index0, Index1;
SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
+ // Extract element from splat_vector should be free.
+ // TODO: use DAG.isSplatValue instead?
+ bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
+ N1.getOpcode() == ISD::SPLAT_VECTOR;
if (!Src0 || !Src1 || Index0 != Index1 ||
Src0.getValueType().getVectorElementType() != EltVT ||
Src1.getValueType().getVectorElementType() != EltVT ||
- !TLI.isExtractVecEltCheap(VT, Index0) ||
+ !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
!TLI.isOperationLegalOrCustom(Opcode, EltVT))
return SDValue();
}
// bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
+ if (VT.isScalableVector())
+ return DAG.getSplatVector(VT, DL, ScalarBO);
SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
return DAG.getBuildVector(VT, DL, Ops);
}
define <vscale x 4 x i1> @lane_mask_nxv4i1_i8(i8 %index, i8 %TC) {
; CHECK-LABEL: lane_mask_nxv4i1_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: index z0.s, #0, #1
-; CHECK-NEXT: mov z1.s, w0
; CHECK-NEXT: and z0.s, z0.s, #0xff
-; CHECK-NEXT: and z1.s, z1.s, #0xff
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: and w8, w1, #0xff
; CHECK-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEXT: mov z1.s, w1
; CHECK-NEXT: umin z0.s, z0.s, #255
-; CHECK-NEXT: and z1.s, z1.s, #0xff
; CHECK-NEXT: and z0.s, z0.s, #0xff
-; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
; CHECK-NEXT: ret
%active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i8(i8 %index, i8 %TC)
; CHECK-LABEL: lane_mask_nxv2i1_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: and x8, x0, #0xff
; CHECK-NEXT: index z0.d, #0, #1
-; CHECK-NEXT: mov z1.d, x0
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: and x9, x1, #0xff
; CHECK-NEXT: and z0.d, z0.d, #0xff
-; CHECK-NEXT: and z1.d, z1.d, #0xff
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: add z0.d, z0.d, z1.d
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: mov z2.d, x1
+; CHECK-NEXT: mov z1.d, x9
; CHECK-NEXT: umin z0.d, z0.d, #255
-; CHECK-NEXT: and z2.d, z2.d, #0xff
; CHECK-NEXT: and z0.d, z0.d, #0xff
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z0.d
+; CHECK-NEXT: cmphi p0.d, p0/z, z1.d, z0.d
; CHECK-NEXT: ret
%active.lane.mask = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i8(i8 %index, i8 %TC)
ret <vscale x 2 x i1> %active.lane.mask
define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #1 {
; CHECK-LABEL: splat_fdiv_nxv4f32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT: fmov z2.s, #1.00000000
-; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmov s2, #1.00000000
+; CHECK-NEXT: fdiv s0, s2, s0
; CHECK-NEXT: mov z0.s, s0
-; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: fmul z0.s, z1.s, z0.s
; CHECK-NEXT: ret
entry:
define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #1 {
; CHECK-LABEL: splat_three_fdiv_nxv4f32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT: fmov z4.s, #1.00000000
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z0.s, s0
-; CHECK-NEXT: fdiv z4.s, p0/m, z4.s, z0.s
+; CHECK-NEXT: fmov s4, #1.00000000
+; CHECK-NEXT: fdiv s0, s4, s0
+; CHECK-NEXT: mov z4.s, s0
; CHECK-NEXT: fmul z0.s, z1.s, z4.s
; CHECK-NEXT: fmul z1.s, z2.s, z4.s
; CHECK-NEXT: fmul z2.s, z3.s, z4.s
define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
; CHECK-LABEL: splat_two_fdiv_nxv2f64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fmov z3.d, #1.00000000
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z0.d, d0
-; CHECK-NEXT: fdiv z3.d, p0/m, z3.d, z0.d
+; CHECK-NEXT: fmov d3, #1.00000000
+; CHECK-NEXT: fdiv d0, d3, d0
+; CHECK-NEXT: mov z3.d, d0
; CHECK-NEXT: fmul z0.d, z1.d, z3.d
; CHECK-NEXT: fmul z1.d, z2.d, z3.d
; CHECK-NEXT: b foo_2_nxv2f64
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_256-LABEL: select_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
-; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
+; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: mov z4.s, w9
-; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1
; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0
; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s
; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w8
-; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1
; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_256-LABEL: select_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
-; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
+; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: mov z4.d, x9
-; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1
; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0
; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d
; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x8
-; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1
; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_256-LABEL: select_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
-; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
+; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: mov z4.s, w9
-; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1
; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0
; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s
; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w8
-; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1
; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_256-LABEL: select_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
-; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
+; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: mov z4.d, x9
-; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1
; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0
; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d
; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x8
-; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1
; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
define <vscale x 2 x i8*> @scalable_of_fixed_1(i8* %base) {
; CHECK-LABEL: scalable_of_fixed_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.d, x0
-; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1
+; CHECK-NEXT: add x8, x0, #1
+; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: ret
%idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%d = getelementptr i8, i8* %base, <vscale x 2 x i64> %idx
define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_1(<vscale x 2 x i64>* %base) {
; CHECK-LABEL: scalable_of_scalable_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.d, x0
-; CHECK-NEXT: incd z0.d, all, mul #8
+; CHECK-NEXT: addvl x8, x0, #1
+; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: ret
%idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, <vscale x 2 x i64> %idx
; This code generates a concat_vectors with more than 2 inputs. Make sure
; that this compiles successfully.
-; CHECK: vlsr
+; CHECK: lsr
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
target triple = "hexagon"
;
; RV64-LABEL: vadd_xx_nxv8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vadd.vx v8, v8, a1
; RV64-NEXT: ret
%head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
%splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
;
; RV64-LABEL: vand_xx_nxv8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vand.vx v8, v8, a1
; RV64-NEXT: ret
%head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
%splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM
; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M
define <vscale x 1 x i8> @vmul_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
; CHECK-LABEL: vmul_vv_nxv1i8:
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
-; RV64-LABEL: vmul_xx_nxv8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vmul.vx v8, v8, a1
-; RV64-NEXT: ret
+; RV64NOM-LABEL: vmul_xx_nxv8i64:
+; RV64NOM: # %bb.0:
+; RV64NOM-NEXT: vsetvli a2, zero, e64, m8, ta, mu
+; RV64NOM-NEXT: vmv.v.x v8, a0
+; RV64NOM-NEXT: vmul.vx v8, v8, a1
+; RV64NOM-NEXT: ret
+;
+; RV64M-LABEL: vmul_xx_nxv8i64:
+; RV64M: # %bb.0:
+; RV64M-NEXT: mul a0, a0, a1
+; RV64M-NEXT: vsetvli a1, zero, e64, m8, ta, mu
+; RV64M-NEXT: vmv.v.x v8, a0
+; RV64M-NEXT: ret
%head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
%splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
%head2 = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
;
; RV64-LABEL: vor_xx_nxv8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT: or a0, a0, a1
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vor.vx v8, v8, a1
; RV64-NEXT: ret
%head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
%splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
;
; RV64-LABEL: vsub_xx_nxv8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT: sub a0, a0, a1
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vsub.vx v8, v8, a1
; RV64-NEXT: ret
%head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
%splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
;
; RV64-LABEL: vxor_xx_nxv8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vxor.vx v8, v8, a1
; RV64-NEXT: ret
%head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
%splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer