setOperationAction(ISD::CTPOP, MVT::i64, Custom);
setOperationAction(ISD::CTPOP, MVT::i128, Custom);
+ setOperationAction(ISD::PARITY, MVT::i64, Custom);
+ setOperationAction(ISD::PARITY, MVT::i128, Custom);
+
setOperationAction(ISD::ABS, MVT::i32, Custom);
setOperationAction(ISD::ABS, MVT::i64, Custom);
case ISD::SRA_PARTS:
return LowerShiftParts(Op, DAG);
case ISD::CTPOP:
- return LowerCTPOP(Op, DAG);
+ case ISD::PARITY:
+ return LowerCTPOP_PARITY(Op, DAG);
case ISD::FCOPYSIGN:
return LowerFCOPYSIGN(Op, DAG);
case ISD::OR:
return BitCast(VT, BSP, DAG);
}
-SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
+ SelectionDAG &DAG) const {
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat))
return SDValue();
if (!Subtarget->hasNEON())
return SDValue();
+ bool IsParity = Op.getOpcode() == ISD::PARITY;
+
// While there is no integer popcount instruction, it can
// be more efficiently lowered to the following sequence that uses
// AdvSIMD registers/instructions as long as the copies to/from
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
+ if (IsParity)
+ UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
+ DAG.getConstant(1, DL, MVT::i32));
+
if (VT == MVT::i64)
UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
return UaddLV;
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
+ if (IsParity)
+ UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
+ DAG.getConstant(1, DL, MVT::i32));
+
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
}
+ assert(!IsParity && "ISD::PARITY of vector types not supported");
+
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
return;
case ISD::CTPOP:
- if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
+ case ISD::PARITY:
+ if (SDValue Result = LowerCTPOP_PARITY(SDValue(N, 0), DAG))
Results.push_back(Result);
return;
case AArch64ISD::SADDV:
define i64 @parity_64(i64 %x) {
; CHECK-LABEL: parity_64:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor x8, x0, x0, lsr #32
-; CHECK-NEXT: eor x8, x8, x8, lsr #16
-; CHECK-NEXT: eor x8, x8, x8, lsr #8
-; CHECK-NEXT: eor x8, x8, x8, lsr #4
-; CHECK-NEXT: eor x8, x8, x8, lsr #2
-; CHECK-NEXT: eor w8, w8, w8, lsr #1
-; CHECK-NEXT: and x0, x8, #0x1
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: cnt v0.8b, v0.8b
+; CHECK-NEXT: uaddlv h0, v0.8b
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%1 = tail call i64 @llvm.ctpop.i64(i64 %x)
%2 = and i64 %1, 1
define i128 @parity_128(i128 %x) {
; CHECK-LABEL: parity_128:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor x8, x0, x1
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: mov x1, xzr
-; CHECK-NEXT: eor x8, x8, x8, lsr #32
-; CHECK-NEXT: eor x8, x8, x8, lsr #16
-; CHECK-NEXT: eor x8, x8, x8, lsr #8
-; CHECK-NEXT: eor x8, x8, x8, lsr #4
-; CHECK-NEXT: eor x8, x8, x8, lsr #2
-; CHECK-NEXT: eor w8, w8, w8, lsr #1
-; CHECK-NEXT: and x0, x8, #0x1
+; CHECK-NEXT: cnt v0.16b, v0.16b
+; CHECK-NEXT: uaddlv h0, v0.16b
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%1 = tail call i128 @llvm.ctpop.i128(i128 %x)
%2 = and i128 %1, 1
define i32 @parity_64_trunc(i64 %x) {
; CHECK-LABEL: parity_64_trunc:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor x8, x0, x0, lsr #32
-; CHECK-NEXT: eor x8, x8, x8, lsr #16
-; CHECK-NEXT: eor x8, x8, x8, lsr #8
-; CHECK-NEXT: eor x8, x8, x8, lsr #4
-; CHECK-NEXT: eor x8, x8, x8, lsr #2
-; CHECK-NEXT: eor w8, w8, w8, lsr #1
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: cnt v0.8b, v0.8b
+; CHECK-NEXT: uaddlv h0, v0.8b
+; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%1 = tail call i64 @llvm.ctpop.i64(i64 %x)