LLVMMatchType<0>],
[IntrNoMem]>;
+ class AdvSIMD_SVE_PUNPKHI_Intrinsic
+ : Intrinsic<[LLVMHalfElementsVectorType<0>],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+
// This class of intrinsics are not intended to be useful within LLVM IR but
// are instead here to support some of the more regid parts of the ACLE.
class Builtin_SVCVT<string name, LLVMType OUT, LLVMType IN>
//
def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>;
+
+//
+// Predicate operations
+//
+
+def int_aarch64_sve_punpkhi : AdvSIMD_SVE_PUNPKHI_Intrinsic;
+def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic;
}
}
case IITDescriptor::HalfVecArgument:
// If this is a forward reference, defer the check for later.
- return D.getArgumentNumber() >= ArgTys.size() ||
- !isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
+ if (D.getArgumentNumber() >= ArgTys.size())
+ return IsDeferredCheck || DeferCheck(Ty);
+ return !isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
VectorType::getHalfElementsVectorType(
cast<VectorType>(ArgTys[D.getArgumentNumber()])) != Ty;
case IITDescriptor::SameVecWidthArgument: {
defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo">;
defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi">;
- def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">;
- def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">;
+ defm PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo", int_aarch64_sve_punpklo>;
+ defm PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi", int_aarch64_sve_punpkhi>;
defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
// SVE pattern match helpers.
//===----------------------------------------------------------------------===//
+class SVE_1_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ Instruction inst>
+: Pat<(vtd (op vt1:$Op1)),
+ (inst $Op1)>;
+
class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Instruction inst>
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
let Inst{3-0} = Pd;
}
+multiclass sve_int_perm_punpk<bit opc, string asm, SDPatternOperator op> {
+ def NAME : sve_int_perm_punpk<opc, asm>;
+
+ def : SVE_1_Op_Pat<nxv8i1, op, nxv16i1, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Pat<nxv4i1, op, nxv8i1, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Pat<nxv2i1, op, nxv4i1, !cast<Instruction>(NAME)>;
+}
+
class sve_int_rdffr_pred<bit s, string asm>
: I<(outs PPR8:$Pd), (ins PPRAny:$Pg),
asm, "\t$Pd, $Pg/z",
--- /dev/null
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; PUNPKHI
+;
+
+define <vscale x 8 x i1> @punpkhi_b16(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: punpkhi_b16
+; CHECK: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i1> @llvm.aarch64.sve.punpkhi.nxv8i1(<vscale x 16 x i1> %a)
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @punpkhi_b8(<vscale x 8 x i1> %a) {
+; CHECK-LABEL: punpkhi_b8
+; CHECK: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i1> @llvm.aarch64.sve.punpkhi.nxv4i1(<vscale x 8 x i1> %a)
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @punpkhi_b4(<vscale x 4 x i1> %a) {
+; CHECK-LABEL: punpkhi_b4
+; CHECK: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.aarch64.sve.punpkhi.nxv2i1(<vscale x 4 x i1> %a)
+ ret <vscale x 2 x i1> %res
+}
+
+;
+; PUNPKLO
+;
+
+define <vscale x 8 x i1> @punpklo_b16(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: punpklo_b16
+; CHECK: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i1> @llvm.aarch64.sve.punpklo.nxv8i1(<vscale x 16 x i1> %a)
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @punpklo_b8(<vscale x 8 x i1> %a) {
+; CHECK-LABEL: punpklo_b8
+; CHECK: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i1> @llvm.aarch64.sve.punpklo.nxv4i1(<vscale x 8 x i1> %a)
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @punpklo_b4(<vscale x 4 x i1> %a) {
+; CHECK-LABEL: punpklo_b4
+; CHECK: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.aarch64.sve.punpklo.nxv2i1(<vscale x 4 x i1> %a)
+ ret <vscale x 2 x i1> %res
+}
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.punpkhi.nxv8i1(<vscale x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.punpkhi.nxv4i1(<vscale x 8 x i1>)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.punpkhi.nxv2i1(<vscale x 4 x i1>)
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.punpklo.nxv8i1(<vscale x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.punpklo.nxv4i1(<vscale x 8 x i1>)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.punpklo.nxv2i1(<vscale x 4 x i1>)