defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 250>;
defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 251>;
+// Lower llvm.wasm.trunc.saturate.* to saturating instructions
+def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))),
+ (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>;
+def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))),
+ (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>;
+
// Widening operations
multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg,
bits<32> baseInst> {
defm "" : SIMDNarrow<v16i8, "i8x16", v8i16, "i16x8", 101>;
defm "" : SIMDNarrow<v8i16, "i16x8", v4i32, "i32x4", 133>;
-// Lower llvm.wasm.trunc.saturate.* to saturating instructions
-def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))),
- (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>;
-def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))),
- (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>;
+// Use narrowing operations for truncating stores. Since the narrowing
+// operations are saturating instead of truncating, we need to mask
+// the stored values first.
+// TODO: Use consts instead of splats
+def store_v8i8_trunc_v8i16 :
+ OutPatFrag<(ops node:$val),
+ (EXTRACT_LANE_v2i64
+ (NARROW_U_v16i8
+ (AND_v4i32 (SPLAT_v4i32 (CONST_I32 0x00ff00ff)), node:$val),
+ node:$val // Unused input
+ ),
+ 0
+ )>;
+
+def store_v4i16_trunc_v4i32 :
+ OutPatFrag<(ops node:$val),
+ (EXTRACT_LANE_v2i64
+ (NARROW_U_v8i16
+ (AND_v4i32 (SPLAT_v4i32 (CONST_I32 0x0000ffff)), node:$val),
+ node:$val // Unused input
+ ),
+ 0
+ )>;
+
+// Store patterns adapted from WebAssemblyInstrMemory.td
+multiclass NarrowingStorePatNoOffset<ValueType ty, PatFrag node,
+ OutPatFrag out> {
+ def : Pat<(node ty:$val, I32:$addr),
+ (STORE_I64_A32 0, 0, I32:$addr, (i64 (out ty:$val)))>,
+ Requires<[HasAddr32]>;
+ def : Pat<(node ty:$val, I64:$addr),
+ (STORE_I64_A64 0, 0, I64:$addr, (i64 (out ty:$val)))>,
+ Requires<[HasAddr64]>;
+}
+
+defm : NarrowingStorePatNoOffset<v8i16, truncstorevi8, store_v8i8_trunc_v8i16>;
+defm : NarrowingStorePatNoOffset<v4i32, truncstorevi16,
+ store_v4i16_trunc_v4i32>;
+
+multiclass NarrowingStorePatImmOff<ValueType ty, PatFrag kind,
+ PatFrag operand, OutPatFrag out> {
+ def : Pat<(kind ty:$val, (operand I32:$addr, imm:$off)),
+ (STORE_I64_A32 0, imm:$off, I32:$addr, (i64 (out ty:$val)))>,
+ Requires<[HasAddr32]>;
+ def : Pat<(kind ty:$val, (operand I64:$addr, imm:$off)),
+ (STORE_I64_A64 0, imm:$off, I64:$addr, (i64 (out ty:$val)))>,
+ Requires<[HasAddr64]>;
+}
+
+defm : NarrowingStorePatImmOff<v8i16, truncstorevi8, regPlusImm,
+ store_v8i8_trunc_v8i16>;
+defm : NarrowingStorePatImmOff<v4i32, truncstorevi16, regPlusImm,
+ store_v4i16_trunc_v4i32>;
+defm : NarrowingStorePatImmOff<v8i16, truncstorevi8, or_is_add,
+ store_v8i8_trunc_v8i16>;
+defm : NarrowingStorePatImmOff<v4i32, truncstorevi16, or_is_add,
+ store_v4i16_trunc_v4i32>;
+
+multiclass NarrowingStorePatOffsetOnly<ValueType ty, PatFrag kind,
+ OutPatFrag out> {
+ def : Pat<(kind ty:$val, imm:$off),
+ (STORE_I64_A32 0, imm:$off, (CONST_I32 0), (i64 (out ty:$val)))>,
+ Requires<[HasAddr32]>;
+ def : Pat<(kind ty:$val, imm:$off),
+ (STORE_I64_A64 0, imm:$off, (CONST_I64 0), (i64 (out ty:$val)))>,
+ Requires<[HasAddr64]>;
+}
+
+defm : NarrowingStorePatOffsetOnly<v8i16, truncstorevi8,
+ store_v8i8_trunc_v8i16>;
+defm : NarrowingStorePatOffsetOnly<v4i32, truncstorevi16,
+ store_v4i16_trunc_v4i32>;
+
+multiclass NarrowingStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind,
+ OutPatFrag out> {
+ def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE_I64_A32
+ 0, tglobaladdr:$off, (CONST_I32 0), (i64 (out ty:$val)))>,
+ Requires<[IsNotPIC, HasAddr32]>;
+ def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE_I64_A64
+ 0, tglobaladdr:$off, (CONST_I64 0), (i64 (out ty:$val)))>,
+ Requires<[IsNotPIC, HasAddr64]>;
+}
+
+defm : NarrowingStorePatGlobalAddrOffOnly<v8i16, truncstorevi8,
+ store_v8i8_trunc_v8i16>;
+defm : NarrowingStorePatGlobalAddrOffOnly<v4i32, truncstorevi16,
+ store_v4i16_trunc_v4i32>;
// Bitcasts are nops
// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
ret void
}
+define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) {
+; CHECK-LABEL: store_narrowing_v8i16:
+; CHECK: .functype store_narrowing_v8i16 (v128, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16711935
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: # fallthrough-return
+ store <8 x i8> %v, <8 x i8>* %p
+ ret void
+}
+
define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
; CHECK-LABEL: store_v8i16_with_folded_offset:
; CHECK: .functype store_v8i16_with_folded_offset (v128, i32) -> ()
ret void
}
+define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p) {
+; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset:
+; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16711935
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 16
+; CHECK-NEXT: # fallthrough-return
+ %q = ptrtoint <8 x i8>* %p to i32
+ %r = add nuw i32 %q, 16
+ %s = inttoptr i32 %r to <8 x i8>*
+ store <8 x i8> %v , <8 x i8>* %s
+ ret void
+}
+
define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
; CHECK: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> ()
ret void
}
+define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
+; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset:
+; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16711935
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 8
+; CHECK-NEXT: # fallthrough-return
+ %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
+ store <8 x i8> %v , <8 x i8>* %s
+ ret void
+}
+
define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
; CHECK: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
ret void
}
+define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, <8 x i8>* %p) {
+; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset:
+; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const -8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 16711935
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: # fallthrough-return
+ %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
+ store <8 x i8> %v , <8 x i8>* %s
+ ret void
+}
+
define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
; CHECK-LABEL: store_v8i16_with_unfolded_offset:
; CHECK: .functype store_v8i16_with_unfolded_offset (v128, i32) -> ()
ret void
}
+define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %p) {
+; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset:
+; CHECK: .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 16711935
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: # fallthrough-return
+ %q = ptrtoint <8 x i8>* %p to i32
+ %r = add nsw i32 %q, 16
+ %s = inttoptr i32 %r to <8 x i8>*
+ store <8 x i8> %v , <8 x i8>* %s
+ ret void
+}
+
define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
; CHECK: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
ret void
}
+define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
+; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset:
+; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 16711935
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: # fallthrough-return
+ %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
+ store <8 x i8> %v , <8 x i8>* %s
+ ret void
+}
+
define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
; CHECK-LABEL: store_v8i16_to_numeric_address:
; CHECK: .functype store_v8i16_to_numeric_address (v128) -> ()
ret void
}
+define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p) {
+; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address:
+; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.const 16711935
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 32
+; CHECK-NEXT: # fallthrough-return
+ %s = inttoptr i32 32 to <8 x i8>*
+ store <8 x i8> %v , <8 x i8>* %s
+ ret void
+}
+
define void @store_v8i16_to_global_address(<8 x i16> %v) {
; CHECK-LABEL: store_v8i16_to_global_address:
; CHECK: .functype store_v8i16_to_global_address (v128) -> ()
ret void
}
+define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) {
+; CHECK-LABEL: store_narrowing_v8i16_to_global_address:
+; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.const 16711935
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store gv_v8i8
+; CHECK-NEXT: # fallthrough-return
+ store <8 x i8> %v , <8 x i8>* @gv_v8i8
+ ret void
+}
+
; ==============================================================================
; 4 x i32
; ==============================================================================
ret void
}
+define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) {
+; CHECK-LABEL: store_narrowing_v4i32:
+; CHECK: .functype store_narrowing_v4i32 (v128, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: # fallthrough-return
+ store <4 x i16> %v , <4 x i16>* %p
+ ret void
+}
+
define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
; CHECK-LABEL: store_v4i32_with_folded_offset:
; CHECK: .functype store_v4i32_with_folded_offset (v128, i32) -> ()
ret void
}
+define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %p) {
+; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset:
+; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 16
+; CHECK-NEXT: # fallthrough-return
+ %q = ptrtoint <4 x i16>* %p to i32
+ %r = add nuw i32 %q, 16
+ %s = inttoptr i32 %r to <4 x i16>*
+ store <4 x i16> %v , <4 x i16>* %s
+ ret void
+}
+
define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
; CHECK: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> ()
ret void
}
+define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
+; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset:
+; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 8
+; CHECK-NEXT: # fallthrough-return
+ %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
+ store <4 x i16> %v , <4 x i16>* %s
+ ret void
+}
+
define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
; CHECK: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
ret void
}
+define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, <4 x i16>* %p) {
+; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset:
+; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const -8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: # fallthrough-return
+ %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
+ store <4 x i16> %v , <4 x i16>* %s
+ ret void
+}
+
define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
; CHECK-LABEL: store_v4i32_with_unfolded_offset:
; CHECK: .functype store_v4i32_with_unfolded_offset (v128, i32) -> ()
ret void
}
+define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>* %p) {
+; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset:
+; CHECK: .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: # fallthrough-return
+ %q = ptrtoint <4 x i16>* %p to i32
+ %r = add nsw i32 %q, 16
+ %s = inttoptr i32 %r to <4 x i16>*
+ store <4 x i16> %v , <4 x i16>* %s
+ ret void
+}
+
define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
; CHECK: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
ret void
}
+define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
+; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset:
+; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: # fallthrough-return
+ %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
+ store <4 x i16> %v , <4 x i16>* %s
+ ret void
+}
+
define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
; CHECK-LABEL: store_v4i32_to_numeric_address:
; CHECK: .functype store_v4i32_to_numeric_address (v128) -> ()
ret void
}
+define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) {
+; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address:
+; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store 32
+; CHECK-NEXT: # fallthrough-return
+ %s = inttoptr i32 32 to <4 x i16>*
+ store <4 x i16> %v , <4 x i16>* %s
+ ret void
+}
+
define void @store_v4i32_to_global_address(<4 x i32> %v) {
; CHECK-LABEL: store_v4i32_to_global_address:
; CHECK: .functype store_v4i32_to_global_address (v128) -> ()
ret void
}
+define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) {
+; CHECK-LABEL: store_narrowing_v4i32_to_global_address:
+; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i64x2.extract_lane 0
+; CHECK-NEXT: i64.store gv_v4i16
+; CHECK-NEXT: # fallthrough-return
+ store <4 x i16> %v , <4 x i16>* @gv_v4i16
+ ret void
+}
+
; ==============================================================================
; 2 x i64
; ==============================================================================