if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
if (Idx->getAPIntValue() == SplatIndex)
return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
+
+ // Look through a bitcast if LE and splatting lane 0, through to a
+ // scalar_to_vector or a build_vector.
+ if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
+ SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
+ (N0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR ||
+ N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR)) {
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
+ VT.isInteger() && N00VT.isInteger()) {
+ EVT InVT =
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT.getScalarType());
+ SDValue Op = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0),
+ SDLoc(N), InVT);
+ return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
+ }
+ }
}
// If this is a bit convert that changes the element type of the vector but
define <8 x i16> @bitcast_i64_v8i16(i64 %a) {
; CHECK-LABEL: bitcast_i64_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, x0
-; CHECK-NEXT: dup.8h v0, v0[0]
+; CHECK-NEXT: dup.8h v0, w0
; CHECK-NEXT: ret
%b = bitcast i64 %a to <4 x i16>
%r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> zeroinitializer
; CHECK-LABEL: cmplx_mul_combined_re_im:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: lsr x8, x0, #16
+; CHECK-NEXT: adrp x9, .LCPI196_0
; CHECK-NEXT: fmov d4, x0
; CHECK-NEXT: rev32 v5.8h, v0.8h
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: adrp x8, .LCPI196_0
-; CHECK-NEXT: dup v1.8h, v1.h[0]
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI196_0]
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI196_0]
; CHECK-NEXT: sqneg v2.8h, v1.8h
; CHECK-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b
; CHECK-NEXT: sqdmull v2.4s, v0.4h, v4.h[0]
define arm_aapcs_vfpcc <8 x i16> @bitcast_i128_v8i16(i128 %a) {
; CHECK-LE-LABEL: bitcast_i128_v8i16:
; CHECK-LE: @ %bb.0:
-; CHECK-LE-NEXT: vmov.32 q0[0], r0
-; CHECK-LE-NEXT: vmov.u16 r0, q0[0]
; CHECK-LE-NEXT: vdup.16 q0, r0
; CHECK-LE-NEXT: bx lr
;
define arm_aapcs_vfpcc <8 x i16> @other_max_case(i32 %blockSize) {
; CHECK-LE-LABEL: other_max_case:
; CHECK-LE: @ %bb.0:
-; CHECK-LE-NEXT: vmov.32 q0[0], r0
-; CHECK-LE-NEXT: vmov.u16 r0, q0[0]
; CHECK-LE-NEXT: vdup.16 q0, r0
; CHECK-LE-NEXT: bx lr
;
; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
; Test that a splat shuffle of an fp-to-int bitcasted vector correctly
-; optimizes and lowers to a single splat instruction. Without a custom
-; DAG combine, this ends up doing both a splat and a shuffle.
+; optimizes and lowers to a single splat instruction.
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: not_a_vec:
; CHECK-NEXT: .functype not_a_vec (i64, i64) -> (v128){{$}}
-; CHECK-NEXT: i64x2.splat $push[[L1:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L1]], $2, 0, 1, 2, 3
+; CHECK-NEXT: i32.wrap_i64 $push[[L:[0-9]+]]=, $0
+; CHECK-NEXT: i32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]
; CHECK-NEXT: return $pop[[R]]
define <4 x i32> @not_a_vec(i128 %x) {
%a = bitcast i128 %x to <4 x i32>
; X64-LABEL: test2:
; X64: ## %bb.0: ## %entry
; X64-NEXT: movq _tmp_V2i@GOTPCREL(%rip), %rax
-; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: movq %xmm0, (%rax)
; X64-NEXT: retq
entry: