TARGET_BUILTIN(__builtin_wasm_avgr_u_i16x8, "V8sV8sV8s", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_bitselect, "V4iV4iV4iV4i", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_shuffle_v8x16, "V16cV16cV16cIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIi", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_any_true_i8x16, "iV16c", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_any_true_i16x8, "iV8s", "nc", "simd128")
CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Vec->getType()});
return Builder.CreateCall(Callee, Vec);
}
+ case WebAssembly::BI__builtin_wasm_shuffle_v8x16: {
+ Value *Ops[18];
+ size_t OpIdx = 0;
+ Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
+ Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
+ while (OpIdx < 18) {
+ llvm::APSInt LaneConst;
+ if (!E->getArg(OpIdx)->isIntegerConstantExpr(LaneConst, getContext()))
+ llvm_unreachable("Constant arg isn't actually constant?");
+ Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
+ }
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
+ return Builder.CreateCall(Callee, Ops);
+ }
default:
return nullptr;
}
#define wasm_v8x16_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \
__c7, __c8, __c9, __c10, __c11, __c12, __c13, \
__c14, __c15) \
- ((v128_t)(__builtin_shufflevector( \
- (__u8x16)(__a), (__u8x16)(__b), __c0, __c1, __c2, __c3, __c4, __c5, \
- __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15)))
+ ((v128_t)__builtin_wasm_shuffle_v8x16( \
+ (__i8x16)(__a), (__i8x16)(__b), __c0, __c1, __c2, __c3, __c4, __c5, \
+ __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15))
#define wasm_v16x8_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \
__c7) \
- ((v128_t)(__builtin_shufflevector((__u16x8)(__a), (__u16x8)(__b), __c0, \
- __c1, __c2, __c3, __c4, __c5, __c6, \
- __c7)))
+ ((v128_t)__builtin_wasm_shuffle_v8x16( \
+ (__i8x16)(__a), (__i8x16)(__b), __c0 * 2, __c0 * 2 + 1, __c1 * 2, \
+ __c1 * 2 + 1, __c2 * 2, __c2 * 2 + 1, __c3 * 2, __c3 * 2 + 1, __c4 * 2, \
+ __c4 * 2 + 1, __c5 * 2, __c5 * 2 + 1, __c6 * 2, __c6 * 2 + 1, __c7 * 2, \
+ __c7 * 2 + 1))
#define wasm_v32x4_shuffle(__a, __b, __c0, __c1, __c2, __c3) \
- ((v128_t)(__builtin_shufflevector((__u32x4)(__a), (__u32x4)(__b), __c0, \
- __c1, __c2, __c3)))
+ ((v128_t)__builtin_wasm_shuffle_v8x16( \
+ (__i8x16)(__a), (__i8x16)(__b), __c0 * 4, __c0 * 4 + 1, __c0 * 4 + 2, \
+ __c0 * 4 + 3, __c1 * 4, __c1 * 4 + 1, __c1 * 4 + 2, __c1 * 4 + 3, \
+ __c2 * 4, __c2 * 4 + 1, __c2 * 4 + 2, __c2 * 4 + 3, __c3 * 4, \
+ __c3 * 4 + 1, __c3 * 4 + 2, __c3 * 4 + 3))
#define wasm_v64x2_shuffle(__a, __b, __c0, __c1) \
- ((v128_t)( \
- __builtin_shufflevector((__u64x2)(__a), (__u64x2)(__b), __c0, __c1)))
+ ((v128_t)__builtin_wasm_shuffle_v8x16( \
+ (__i8x16)(__a), (__i8x16)(__b), __c0 * 8, __c0 * 8 + 1, __c0 * 8 + 2, \
+ __c0 * 8 + 3, __c0 * 8 + 4, __c0 * 8 + 5, __c0 * 8 + 6, __c0 * 8 + 7, \
+ __c1 * 8, __c1 * 8 + 1, __c1 * 8 + 2, __c1 * 8 + 3, __c1 * 8 + 4, \
+ __c1 * 8 + 5, __c1 * 8 + 6, __c1 * 8 + 7))
#ifdef __wasm_unimplemented_simd128__
i8x16 swizzle_v8x16(i8x16 x, i8x16 y) {
return __builtin_wasm_swizzle_v8x16(x, y);
// WEBASSEMBLY: call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %x, <16 x i8> %y)
+}
+
+i8x16 shuffle(i8x16 x, i8x16 y) {
+ return __builtin_wasm_shuffle_v8x16(x, y, 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15);
+ // WEBASSEMBLY: call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y,
+ // WEBASSEMBLY-SAME: i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+ // WEBASSEMBLY-SAME: i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
+ // WEBASSEMBLY-SAME: i32 15
// WEBASSEMBLY-NEXT: ret
}
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem, IntrSpeculatable]>;
+def int_wasm_shuffle :
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_wasm_sub_saturate_signed :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
-
def int_wasm_bitselect :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[llvm_anyvector_ty],
[IntrNoMem, IntrSpeculatable]>;
-
//===----------------------------------------------------------------------===//
// Bulk memory intrinsics
//===----------------------------------------------------------------------===//
Op.getOperand(3) // thrown value
});
}
+
+ case Intrinsic::wasm_shuffle: {
+ // Drop in-chain and replace undefs, but otherwise pass through unchanged
+ SDValue Ops[18];
+ size_t OpIdx = 0;
+ Ops[OpIdx++] = Op.getOperand(1);
+ Ops[OpIdx++] = Op.getOperand(2);
+ while (OpIdx < 18) {
+ const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
+ if (MaskIdx.isUndef() ||
+ cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) {
+ Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32);
+ } else {
+ Ops[OpIdx++] = MaskIdx;
+ }
+ }
+ return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
+ }
}
}
ret <16 x i8> %a
}
+; CHECK-LABEL: shuffle_v16i8:
+; NO-SIMD128-NOT: v8x16
+; SIMD128-NEXT: .functype shuffle_v16i8 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
+; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <16 x i8> @llvm.wasm.shuffle(
+ <16 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+ i32, i32, i32, i32, i32)
+define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) {
+ %res = call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y,
+ i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+ i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 35)
+ ret <16 x i8> %res
+}
+
+; CHECK-LABEL: shuffle_undef_v16i8:
+; NO-SIMD128-NOT: v8x16
+; SIMD128-NEXT: .functype shuffle_undef_v16i8 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1,
+; SIMD128-SAME: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) {
+ %res = call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y,
+ i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
+ i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
+ i32 undef, i32 undef, i32 undef, i32 2)
+ ret <16 x i8> %res
+}
+
; ==============================================================================
; 8 x i16
; ==============================================================================