TARGET_BUILTIN(__builtin_wasm_narrow_s_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_low_s_i16x8_i8x16, "V8sV16c", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_high_s_i16x8_i8x16, "V8sV16c", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_low_u_i16x8_i8x16, "V8sV16c", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_high_u_i16x8_i8x16, "V8sV16c", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_low_s_i32x4_i16x8, "V4iV8s", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_high_s_i32x4_i16x8, "V4iV8s", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_low_u_i32x4_i16x8, "V4iV8s", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_high_u_i32x4_i16x8, "V4iV8s", "nc", "simd128")
-
#undef BUILTIN
#undef TARGET_BUILTIN
CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
return Builder.CreateCall(Callee, {Low, High});
}
- case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8:
- case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8:
- case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8:
- case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: {
- Value *Vec = EmitScalarExpr(E->getArg(0));
- unsigned IntNo;
- switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8:
- IntNo = Intrinsic::wasm_widen_low_signed;
- break;
- case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8:
- IntNo = Intrinsic::wasm_widen_high_signed;
- break;
- case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8:
- IntNo = Intrinsic::wasm_widen_low_unsigned;
- break;
- case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8:
- IntNo = Intrinsic::wasm_widen_high_unsigned;
- break;
- default:
- llvm_unreachable("unexpected builtin ID");
- }
- Function *Callee =
- CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Vec->getType()});
- return Builder.CreateCall(Callee, Vec);
- }
case WebAssembly::BI__builtin_wasm_shuffle_v8x16: {
Value *Ops[18];
size_t OpIdx = 0;
typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16)));
typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef signed char __i8x8 __attribute__((__vector_size__(8), __aligned__(8)));
+typedef unsigned char __u8x8
+ __attribute__((__vector_size__(8), __aligned__(8)));
+typedef short __i16x4 __attribute__((__vector_size__(8), __aligned__(8)));
+typedef unsigned short __u16x4
+ __attribute__((__vector_size__(8), __aligned__(8)));
+
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("simd128"), \
__min_vector_width__(128)))
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i16x8_widen_low_i8x16(v128_t __a) {
- return (v128_t)__builtin_wasm_widen_low_s_i16x8_i8x16((__i8x16)__a);
+ return (v128_t) __builtin_convertvector(
+ (__i8x8){((__i8x16)__a)[0], ((__i8x16)__a)[1], ((__i8x16)__a)[2],
+ ((__i8x16)__a)[3], ((__i8x16)__a)[4], ((__i8x16)__a)[5],
+ ((__i8x16)__a)[6], ((__i8x16)__a)[7]},
+ __i16x8);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i16x8_widen_high_i8x16(v128_t __a) {
- return (v128_t)__builtin_wasm_widen_high_s_i16x8_i8x16((__i8x16)__a);
+ return (v128_t) __builtin_convertvector(
+ (__i8x8){((__i8x16)__a)[8], ((__i8x16)__a)[9], ((__i8x16)__a)[10],
+ ((__i8x16)__a)[11], ((__i8x16)__a)[12], ((__i8x16)__a)[13],
+ ((__i8x16)__a)[14], ((__i8x16)__a)[15]},
+ __i16x8);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i16x8_widen_low_u8x16(v128_t __a) {
- return (v128_t)__builtin_wasm_widen_low_u_i16x8_i8x16((__i8x16)__a);
+ return (v128_t) __builtin_convertvector(
+ (__u8x8){((__u8x16)__a)[0], ((__u8x16)__a)[1], ((__u8x16)__a)[2],
+ ((__u8x16)__a)[3], ((__u8x16)__a)[4], ((__u8x16)__a)[5],
+ ((__u8x16)__a)[6], ((__u8x16)__a)[7]},
+ __u16x8);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i16x8_widen_high_u8x16(v128_t __a) {
- return (v128_t)__builtin_wasm_widen_high_u_i16x8_i8x16((__i8x16)__a);
+ return (v128_t) __builtin_convertvector(
+ (__u8x8){((__u8x16)__a)[8], ((__u8x16)__a)[9], ((__u8x16)__a)[10],
+ ((__u8x16)__a)[11], ((__u8x16)__a)[12], ((__u8x16)__a)[13],
+ ((__u8x16)__a)[14], ((__u8x16)__a)[15]},
+ __u16x8);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i32x4_widen_low_i16x8(v128_t __a) {
- return (v128_t)__builtin_wasm_widen_low_s_i32x4_i16x8((__i16x8)__a);
+ return (v128_t) __builtin_convertvector(
+ (__i16x4){((__i16x8)__a)[0], ((__i16x8)__a)[1], ((__i16x8)__a)[2],
+ ((__i16x8)__a)[3]},
+ __i32x4);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i32x4_widen_high_i16x8(v128_t __a) {
- return (v128_t)__builtin_wasm_widen_high_s_i32x4_i16x8((__i16x8)__a);
+ return (v128_t) __builtin_convertvector(
+ (__i16x4){((__i16x8)__a)[4], ((__i16x8)__a)[5], ((__i16x8)__a)[6],
+ ((__i16x8)__a)[7]},
+ __i32x4);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i32x4_widen_low_u16x8(v128_t __a) {
- return (v128_t)__builtin_wasm_widen_low_u_i32x4_i16x8((__i16x8)__a);
+ return (v128_t) __builtin_convertvector(
+ (__u16x4){((__u16x8)__a)[0], ((__u16x8)__a)[1], ((__u16x8)__a)[2],
+ ((__u16x8)__a)[3]},
+ __u32x4);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i32x4_widen_high_u16x8(v128_t __a) {
- return (v128_t)__builtin_wasm_widen_high_u_i32x4_i16x8((__i16x8)__a);
+ return (v128_t) __builtin_convertvector(
+ (__u16x4){((__u16x8)__a)[4], ((__u16x8)__a)[5], ((__u16x8)__a)[6],
+ ((__u16x8)__a)[7]},
+ __u32x4);
}
// Undefine helper macros
// WEBASSEMBLY: ret
}
-i16x8 widen_low_s_i16x8_i8x16(i8x16 v) {
- return __builtin_wasm_widen_low_s_i16x8_i8x16(v);
- // WEBASSEMBLY: call <8 x i16> @llvm.wasm.widen.low.signed.v8i16.v16i8(<16 x i8> %v)
- // WEBASSEMBLY: ret
-}
-
-i16x8 widen_high_s_i16x8_i8x16(i8x16 v) {
- return __builtin_wasm_widen_high_s_i16x8_i8x16(v);
- // WEBASSEMBLY: call <8 x i16> @llvm.wasm.widen.high.signed.v8i16.v16i8(<16 x i8> %v)
- // WEBASSEMBLY: ret
-}
-
-i16x8 widen_low_u_i16x8_i8x16(i8x16 v) {
- return __builtin_wasm_widen_low_u_i16x8_i8x16(v);
- // WEBASSEMBLY: call <8 x i16> @llvm.wasm.widen.low.unsigned.v8i16.v16i8(<16 x i8> %v)
- // WEBASSEMBLY: ret
-}
-
-i16x8 widen_high_u_i16x8_i8x16(i8x16 v) {
- return __builtin_wasm_widen_high_u_i16x8_i8x16(v);
- // WEBASSEMBLY: call <8 x i16> @llvm.wasm.widen.high.unsigned.v8i16.v16i8(<16 x i8> %v)
- // WEBASSEMBLY: ret
-}
-
-i32x4 widen_low_s_i32x4_i16x8(i16x8 v) {
- return __builtin_wasm_widen_low_s_i32x4_i16x8(v);
- // WEBASSEMBLY: call <4 x i32> @llvm.wasm.widen.low.signed.v4i32.v8i16(<8 x i16> %v)
- // WEBASSEMBLY: ret
-}
-
-i32x4 widen_high_s_i32x4_i16x8(i16x8 v) {
- return __builtin_wasm_widen_high_s_i32x4_i16x8(v);
- // WEBASSEMBLY: call <4 x i32> @llvm.wasm.widen.high.signed.v4i32.v8i16(<8 x i16> %v)
- // WEBASSEMBLY: ret
-}
-
-i32x4 widen_low_u_i32x4_i16x8(i16x8 v) {
- return __builtin_wasm_widen_low_u_i32x4_i16x8(v);
- // WEBASSEMBLY: call <4 x i32> @llvm.wasm.widen.low.unsigned.v4i32.v8i16(<8 x i16> %v)
- // WEBASSEMBLY: ret
-}
-
-i32x4 widen_high_u_i32x4_i16x8(i16x8 v) {
- return __builtin_wasm_widen_high_u_i32x4_i16x8(v);
- // WEBASSEMBLY: call <4 x i32> @llvm.wasm.widen.high.unsigned.v4i32.v8i16(<8 x i16> %v)
- // WEBASSEMBLY: ret
-}
-
i8x16 swizzle_v8x16(i8x16 x, i8x16 y) {
return __builtin_wasm_swizzle_v8x16(x, y);
// WEBASSEMBLY: call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %x, <16 x i8> %y)
Intrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>],
[IntrNoMem, IntrSpeculatable]>;
-def int_wasm_widen_low_signed :
- Intrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty],
- [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_widen_high_signed :
- Intrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty],
- [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_widen_low_unsigned :
- Intrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty],
- [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_widen_high_unsigned :
- Intrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty],
- [IntrNoMem, IntrSpeculatable]>;
// TODO: Replace these intrinsics with normal ISel patterns
def int_wasm_pmin :
HANDLE_NODETYPE(VEC_SHL)
HANDLE_NODETYPE(VEC_SHR_S)
HANDLE_NODETYPE(VEC_SHR_U)
+HANDLE_NODETYPE(WIDEN_LOW_S)
+HANDLE_NODETYPE(WIDEN_LOW_U)
+HANDLE_NODETYPE(WIDEN_HIGH_S)
+HANDLE_NODETYPE(WIDEN_HIGH_U)
HANDLE_NODETYPE(THROW)
HANDLE_NODETYPE(MEMORY_COPY)
HANDLE_NODETYPE(MEMORY_FILL)
// Hoist bitcasts out of shuffles
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ // Combine extends of extract_subvectors into widening ops
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+
// Support saturating add for i8x16 and i16x8
for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
for (auto T : {MVT::v16i8, MVT::v8i16})
return DAG.getBitcast(DstType, NewShuffle);
}
+static SDValue performVectorWidenCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ auto &DAG = DCI.DAG;
+ assert(N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND);
+
+ // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
+ // possible before the extract_subvector can be expanded.
+ auto Extract = N->getOperand(0);
+ if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+ auto Source = Extract.getOperand(0);
+ auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
+ if (IndexNode == nullptr)
+ return SDValue();
+ auto Index = IndexNode->getZExtValue();
+
+ // Only v8i8 and v4i16 extracts can be widened, and only if the extracted
+ // subvector is the low or high half of its source.
+ EVT ResVT = N->getValueType(0);
+ if (ResVT == MVT::v8i16) {
+ if (Extract.getValueType() != MVT::v8i8 ||
+ Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
+ return SDValue();
+ } else if (ResVT == MVT::v4i32) {
+ if (Extract.getValueType() != MVT::v4i16 ||
+ Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
+ return SDValue();
+ } else {
+ return SDValue();
+ }
+
+ bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
+ bool IsLow = Index == 0;
+
+ unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::WIDEN_LOW_S
+ : WebAssemblyISD::WIDEN_HIGH_S)
+ : (IsLow ? WebAssemblyISD::WIDEN_LOW_U
+ : WebAssemblyISD::WIDEN_HIGH_U);
+
+ return DAG.getNode(Op, SDLoc(N), ResVT, Source);
+}
+
SDValue
WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
return SDValue();
case ISD::VECTOR_SHUFFLE:
return performVECTOR_SHUFFLECombine(N, DCI);
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return performVectorWidenCombine(N, DCI);
}
}
(fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>;
// Widening operations
+def widen_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
+def widen_low_s : SDNode<"WebAssemblyISD::WIDEN_LOW_S", widen_t>;
+def widen_high_s : SDNode<"WebAssemblyISD::WIDEN_HIGH_S", widen_t>;
+def widen_low_u : SDNode<"WebAssemblyISD::WIDEN_LOW_U", widen_t>;
+def widen_high_u : SDNode<"WebAssemblyISD::WIDEN_HIGH_U", widen_t>;
+
multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg,
bits<32> baseInst> {
- defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_low_signed,
+ defm "" : SIMDConvert<vec_t, arg_t, widen_low_s,
vec#".widen_low_"#arg#"_s", baseInst>;
- defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_high_signed,
+ defm "" : SIMDConvert<vec_t, arg_t, widen_high_s,
vec#".widen_high_"#arg#"_s", !add(baseInst, 1)>;
- defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_low_unsigned,
+ defm "" : SIMDConvert<vec_t, arg_t, widen_low_u,
vec#".widen_low_"#arg#"_u", !add(baseInst, 2)>;
- defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_high_unsigned,
+ defm "" : SIMDConvert<vec_t, arg_t, widen_high_u,
vec#".widen_high_"#arg#"_u", !add(baseInst, 3)>;
}
ret <8 x i16> %a
}
-; CHECK-LABEL: widen_low_signed_v8i16:
-; SIMD128-NEXT: .functype widen_low_signed_v8i16 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i16x8.widen_low_i8x16_s $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <8 x i16> @llvm.wasm.widen.low.signed.v8i16.v16i8(<16 x i8>)
-define <8 x i16> @widen_low_signed_v8i16(<16 x i8> %v) {
- %a = call <8 x i16> @llvm.wasm.widen.low.signed.v8i16.v16i8(<16 x i8> %v)
- ret <8 x i16> %a
-}
-
-; CHECK-LABEL: widen_high_signed_v8i16:
-; SIMD128-NEXT: .functype widen_high_signed_v8i16 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i16x8.widen_high_i8x16_s $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <8 x i16> @llvm.wasm.widen.high.signed.v8i16.v16i8(<16 x i8>)
-define <8 x i16> @widen_high_signed_v8i16(<16 x i8> %v) {
- %a = call <8 x i16> @llvm.wasm.widen.high.signed.v8i16.v16i8(<16 x i8> %v)
- ret <8 x i16> %a
-}
-
-; CHECK-LABEL: widen_low_unsigned_v8i16:
-; SIMD128-NEXT: .functype widen_low_unsigned_v8i16 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i16x8.widen_low_i8x16_u $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <8 x i16> @llvm.wasm.widen.low.unsigned.v8i16.v16i8(<16 x i8>)
-define <8 x i16> @widen_low_unsigned_v8i16(<16 x i8> %v) {
- %a = call <8 x i16> @llvm.wasm.widen.low.unsigned.v8i16.v16i8(<16 x i8> %v)
- ret <8 x i16> %a
-}
-
-; CHECK-LABEL: widen_high_unsigned_v8i16:
-; SIMD128-NEXT: .functype widen_high_unsigned_v8i16 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i16x8.widen_high_i8x16_u $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <8 x i16> @llvm.wasm.widen.high.unsigned.v8i16.v16i8(<16 x i8>)
-define <8 x i16> @widen_high_unsigned_v8i16(<16 x i8> %v) {
- %a = call <8 x i16> @llvm.wasm.widen.high.unsigned.v8i16.v16i8(<16 x i8> %v)
- ret <8 x i16> %a
-}
-
; ==============================================================================
; 4 x i32
; ==============================================================================
ret <4 x i32> %a
}
-; CHECK-LABEL: widen_low_signed_v4i32:
-; SIMD128-NEXT: .functype widen_low_signed_v4i32 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i32x4.widen_low_i16x8_s $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.widen.low.signed.v4i32.v8i16(<8 x i16>)
-define <4 x i32> @widen_low_signed_v4i32(<8 x i16> %v) {
- %a = call <4 x i32> @llvm.wasm.widen.low.signed.v4i32.v8i16(<8 x i16> %v)
- ret <4 x i32> %a
-}
-
-; CHECK-LABEL: widen_high_signed_v4i32:
-; SIMD128-NEXT: .functype widen_high_signed_v4i32 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i32x4.widen_high_i16x8_s $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.widen.high.signed.v4i32.v8i16(<8 x i16>)
-define <4 x i32> @widen_high_signed_v4i32(<8 x i16> %v) {
- %a = call <4 x i32> @llvm.wasm.widen.high.signed.v4i32.v8i16(<8 x i16> %v)
- ret <4 x i32> %a
-}
-
-; CHECK-LABEL: widen_low_unsigned_v4i32:
-; SIMD128-NEXT: .functype widen_low_unsigned_v4i32 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i32x4.widen_low_i16x8_u $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.widen.low.unsigned.v4i32.v8i16(<8 x i16>)
-define <4 x i32> @widen_low_unsigned_v4i32(<8 x i16> %v) {
- %a = call <4 x i32> @llvm.wasm.widen.low.unsigned.v4i32.v8i16(<8 x i16> %v)
- ret <4 x i32> %a
-}
-
-; CHECK-LABEL: widen_high_unsigned_v4i32:
-; SIMD128-NEXT: .functype widen_high_unsigned_v4i32 (v128) -> (v128){{$}}
-; SIMD128-NEXT: i32x4.widen_high_i16x8_u $push[[R:[0-9]+]]=, $0{{$}}
-; SIMD128-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.widen.high.unsigned.v4i32.v8i16(<8 x i16>)
-define <4 x i32> @widen_high_unsigned_v4i32(<8 x i16> %v) {
- %a = call <4 x i32> @llvm.wasm.widen.high.unsigned.v4i32.v8i16(<8 x i16> %v)
- ret <4 x i32> %a
-}
-
; ==============================================================================
; 2 x i64
; ==============================================================================
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mattr=+simd128 | FileCheck %s
+
+;; Test that SIMD widening operations can be successfully selected
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+define <8 x i16> @widen_low_i8x16_s(<16 x i8> %v) {
+; CHECK-LABEL: widen_low_i8x16_s:
+; CHECK: .functype widen_low_i8x16_s (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.widen_low_i8x16_s
+; CHECK-NEXT: # fallthrough-return
+ %low = shufflevector <16 x i8> %v, <16 x i8> undef,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %widened = sext <8 x i8> %low to <8 x i16>
+ ret <8 x i16> %widened
+}
+
+define <8 x i16> @widen_low_i8x16_u(<16 x i8> %v) {
+; CHECK-LABEL: widen_low_i8x16_u:
+; CHECK: .functype widen_low_i8x16_u (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.widen_low_i8x16_u
+; CHECK-NEXT: # fallthrough-return
+ %low = shufflevector <16 x i8> %v, <16 x i8> undef,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %widened = zext <8 x i8> %low to <8 x i16>
+ ret <8 x i16> %widened
+}
+
+define <8 x i16> @widen_high_i8x16_s(<16 x i8> %v) {
+; CHECK-LABEL: widen_high_i8x16_s:
+; CHECK: .functype widen_high_i8x16_s (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.widen_high_i8x16_s
+; CHECK-NEXT: # fallthrough-return
+ %low = shufflevector <16 x i8> %v, <16 x i8> undef,
+ <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %widened = sext <8 x i8> %low to <8 x i16>
+ ret <8 x i16> %widened
+}
+
+define <8 x i16> @widen_high_i8x16_u(<16 x i8> %v) {
+; CHECK-LABEL: widen_high_i8x16_u:
+; CHECK: .functype widen_high_i8x16_u (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.widen_high_i8x16_u
+; CHECK-NEXT: # fallthrough-return
+ %low = shufflevector <16 x i8> %v, <16 x i8> undef,
+ <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %widened = zext <8 x i8> %low to <8 x i16>
+ ret <8 x i16> %widened
+}
+
+define <4 x i32> @widen_low_i16x8_s(<8 x i16> %v) {
+; CHECK-LABEL: widen_low_i16x8_s:
+; CHECK: .functype widen_low_i16x8_s (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.widen_low_i16x8_s
+; CHECK-NEXT: # fallthrough-return
+ %low = shufflevector <8 x i16> %v, <8 x i16> undef,
+ <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %widened = sext <4 x i16> %low to <4 x i32>
+ ret <4 x i32> %widened
+}
+
+define <4 x i32> @widen_low_i16x8_u(<8 x i16> %v) {
+; CHECK-LABEL: widen_low_i16x8_u:
+; CHECK: .functype widen_low_i16x8_u (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.widen_low_i16x8_u
+; CHECK-NEXT: # fallthrough-return
+ %low = shufflevector <8 x i16> %v, <8 x i16> undef,
+ <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %widened = zext <4 x i16> %low to <4 x i32>
+ ret <4 x i32> %widened
+}
+
+define <4 x i32> @widen_high_i16x8_s(<8 x i16> %v) {
+; CHECK-LABEL: widen_high_i16x8_s:
+; CHECK: .functype widen_high_i16x8_s (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.widen_high_i16x8_s
+; CHECK-NEXT: # fallthrough-return
+ %low = shufflevector <8 x i16> %v, <8 x i16> undef,
+ <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %widened = sext <4 x i16> %low to <4 x i32>
+ ret <4 x i32> %widened
+}
+
+define <4 x i32> @widen_high_i16x8_u(<8 x i16> %v) {
+; CHECK-LABEL: widen_high_i16x8_u:
+; CHECK: .functype widen_high_i16x8_u (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.widen_high_i16x8_u
+; CHECK-NEXT: # fallthrough-return
+ %low = shufflevector <8 x i16> %v, <8 x i16> undef,
+ <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %widened = zext <4 x i16> %low to <4 x i32>
+ ret <4 x i32> %widened
+}
+
+;; Also test that similar patterns with offsets not corresponding to
+;; the low or high half are correctly expanded.
+
+define <8 x i16> @widen_lowish_i8x16_s(<16 x i8> %v) {
+; CHECK-LABEL: widen_lowish_i8x16_s:
+; CHECK: .functype widen_lowish_i8x16_s (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.extract_lane_u 1
+; CHECK-NEXT: i16x8.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.extract_lane_u 2
+; CHECK-NEXT: i16x8.replace_lane 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.extract_lane_u 3
+; CHECK-NEXT: i16x8.replace_lane 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.extract_lane_u 4
+; CHECK-NEXT: i16x8.replace_lane 3
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.extract_lane_u 5
+; CHECK-NEXT: i16x8.replace_lane 4
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.extract_lane_u 6
+; CHECK-NEXT: i16x8.replace_lane 5
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.extract_lane_u 7
+; CHECK-NEXT: i16x8.replace_lane 6
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.extract_lane_u 8
+; CHECK-NEXT: i16x8.replace_lane 7
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i16x8.shl
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i16x8.shr_s
+; CHECK-NEXT: # fallthrough-return
+ %lowish = shufflevector <16 x i8> %v, <16 x i8> undef,
+ <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+ %widened = sext <8 x i8> %lowish to <8 x i16>
+ ret <8 x i16> %widened
+}
+
+define <4 x i32> @widen_lowish_i16x8_s(<8 x i16> %v) {
+; CHECK-LABEL: widen_lowish_i16x8_s:
+; CHECK: .functype widen_lowish_i16x8_s (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.extract_lane_u 1
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.extract_lane_u 2
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.extract_lane_u 3
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.extract_lane_u 4
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32x4.shl
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32x4.shr_s
+; CHECK-NEXT: # fallthrough-return
+ %lowish = shufflevector <8 x i16> %v, <8 x i16> undef,
+ <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+ %widened = sext <4 x i16> %lowish to <4 x i32>
+ ret <4 x i32> %widened
+}