setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::CTPOP, MVT::i32, Custom);
- setOperationAction(ISD::CTPOP, MVT::i64, Custom);
- setOperationAction(ISD::CTPOP, MVT::i128, Custom);
+ if (Subtarget->hasCSSC()) {
+ setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+ setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+ setOperationAction(ISD::CTPOP, MVT::i128, Expand);
+ setOperationAction(ISD::PARITY, MVT::i128, Expand);
+ } else {
+ setOperationAction(ISD::CTPOP, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i128, Custom);
- setOperationAction(ISD::PARITY, MVT::i64, Custom);
- setOperationAction(ISD::PARITY, MVT::i128, Custom);
+ setOperationAction(ISD::PARITY, MVT::i64, Custom);
+ setOperationAction(ISD::PARITY, MVT::i128, Custom);
+ }
setOperationAction(ISD::ABS, MVT::i32, Custom);
setOperationAction(ISD::ABS, MVT::i64, Custom);
return SDValue();
bool IsParity = Op.getOpcode() == ISD::PARITY;
+ SDValue Val = Op.getOperand(0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
- // While there is no integer popcount instruction, it can
+ // for i32, general parity function using EORs is more efficient compared to
+ // using floating point
+ if (VT == MVT::i32 && IsParity)
+ return SDValue();
+
+ // If there is no CNT instruction available, GPR popcount can
// be more efficiently lowered to the following sequence that uses
// AdvSIMD registers/instructions as long as the copies to/from
// the AdvSIMD registers are cheap.
// CNT V0.8B, V0.8B // 8xbyte pop-counts
// ADDV B0, V0.8B // sum 8xbyte pop-counts
// UMOV X0, V0.B[0] // copy byte result back to integer reg
- SDValue Val = Op.getOperand(0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
-
if (VT == MVT::i32 || VT == MVT::i64) {
if (VT == MVT::i32)
Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
// General Data-Processing Instructions (FEAT_V94_DP)
//===----------------------------------------------------------------------===//
defm ABS : OneOperandData<0b001000, "abs">, Requires<[HasCSSC]>;
-defm CNT : OneOperandData<0b000111, "cnt">, Requires<[HasCSSC]>;
+defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>;
defm CTZ : OneOperandData<0b000110, "ctz">, Requires<[HasCSSC]>;
defm SMAX : ComparisonOp<0, 0, "smax">, Requires<[HasCSSC]>;
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
; RUN: llc < %s -mtriple=aarch64-eabi -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
+; RUN: llc < %s -mtriple=aarch64-eabi -mattr +cssc -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-CSSC %s
define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
; CHECK-LABEL: cnt32_advsimd:
; CHECK-NONEON-NEXT: mul w8, w9, w8
; CHECK-NONEON-NEXT: lsr w0, w8, #24
; CHECK-NONEON-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cnt32_advsimd:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: cnt w0, w0
+; CHECK-CSSC-NEXT: ret
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %cnt
}
; CHECK-NONEON-NEXT: mul w8, w9, w8
; CHECK-NONEON-NEXT: lsr w0, w8, #24
; CHECK-NONEON-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cnt32_advsimd_2:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-CSSC-NEXT: fmov w8, s0
+; CHECK-CSSC-NEXT: cnt w0, w8
+; CHECK-CSSC-NEXT: ret
%1 = extractelement <2 x i32> %x, i64 0
%2 = tail call i32 @llvm.ctpop.i32(i32 %1)
ret i32 %2
; CHECK-NONEON-NEXT: mul x8, x9, x8
; CHECK-NONEON-NEXT: lsr x0, x8, #56
; CHECK-NONEON-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cnt64_advsimd:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: cnt x0, x0
+; CHECK-CSSC-NEXT: ret
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
ret i64 %cnt
}
; CHECK-NONEON-NEXT: mul w8, w9, w8
; CHECK-NONEON-NEXT: lsr w0, w8, #24
; CHECK-NONEON-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cnt32:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: cnt w0, w0
+; CHECK-CSSC-NEXT: ret
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %cnt
}
; CHECK-NONEON-NEXT: mul x8, x9, x8
; CHECK-NONEON-NEXT: lsr x0, x8, #56
; CHECK-NONEON-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cnt64:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: cnt x0, x0
+; CHECK-CSSC-NEXT: ret
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
ret i64 %cnt
}
; CHECK-NONEON-NEXT: ccmp x0, #0, #4, eq
; CHECK-NONEON-NEXT: cset w0, ne
; CHECK-NONEON-NEXT: ret
+;
+; CHECK-CSSC-LABEL: ctpop_eq_one:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: cnt x8, x0
+; CHECK-CSSC-NEXT: cmp x8, #1
+; CHECK-CSSC-NEXT: cset w0, eq
+; CHECK-CSSC-NEXT: ret
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%cmp = icmp eq i64 %count, 1
%conv = zext i1 %cmp to i32
; CHECK-NONEON-NEXT: ccmp x0, #0, #4, eq
; CHECK-NONEON-NEXT: cset w0, eq
; CHECK-NONEON-NEXT: ret
+;
+; CHECK-CSSC-LABEL: ctpop_ne_one:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: cnt x8, x0
+; CHECK-CSSC-NEXT: cmp x8, #1
+; CHECK-CSSC-NEXT: cset w0, ne
+; CHECK-CSSC-NEXT: ret
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%cmp = icmp ne i64 %count, 1
%conv = zext i1 %cmp to i32
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-neon < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-neon -mattr=+cssc < %s | FileCheck %s -check-prefix=CHECK-CSSC
declare i128 @llvm.ctpop.i128(i128)
; CHECK-NEXT: lsr x9, x9, #56
; CHECK-NEXT: add x0, x9, x8, lsr #56
; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: ctpop_i128:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: cnt x8, x1
+; CHECK-CSSC-NEXT: cnt x9, x0
+; CHECK-CSSC-NEXT: add x0, x9, x8
+; CHECK-CSSC-NEXT: mov x1, xzr
+; CHECK-CSSC-NEXT: ret
%c = call i128 @llvm.ctpop.i128(i128 %i)
ret i128 %c
}
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -mattr=+cssc | FileCheck %s -check-prefix=CHECK-CSSC
define i4 @parity_4(i4 %x) {
; CHECK-LABEL: parity_4:
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: parity_4:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: and w8, w0, #0xf
+; CHECK-CSSC-NEXT: cnt w8, w8
+; CHECK-CSSC-NEXT: and w0, w8, #0x1
+; CHECK-CSSC-NEXT: ret
%1 = tail call i4 @llvm.ctpop.i4(i4 %x)
%2 = and i4 %1, 1
ret i4 %2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: parity_8:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: and w8, w0, #0xff
+; CHECK-CSSC-NEXT: cnt w8, w8
+; CHECK-CSSC-NEXT: and w0, w8, #0x1
+; CHECK-CSSC-NEXT: ret
%1 = tail call i8 @llvm.ctpop.i8(i8 %x)
%2 = and i8 %1, 1
ret i8 %2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: parity_16:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: and w8, w0, #0xffff
+; CHECK-CSSC-NEXT: cnt w8, w8
+; CHECK-CSSC-NEXT: and w0, w8, #0x1
+; CHECK-CSSC-NEXT: ret
%1 = tail call i16 @llvm.ctpop.i16(i16 %x)
%2 = and i16 %1, 1
ret i16 %2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: parity_17:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: and w8, w0, #0x1ffff
+; CHECK-CSSC-NEXT: cnt w8, w8
+; CHECK-CSSC-NEXT: and w0, w8, #0x1
+; CHECK-CSSC-NEXT: ret
%1 = tail call i17 @llvm.ctpop.i17(i17 %x)
%2 = and i17 %1, 1
ret i17 %2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: parity_32:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: cnt w8, w0
+; CHECK-CSSC-NEXT: and w0, w8, #0x1
+; CHECK-CSSC-NEXT: ret
%1 = tail call i32 @llvm.ctpop.i32(i32 %x)
%2 = and i32 %1, 1
ret i32 %2
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: parity_64:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: cnt x8, x0
+; CHECK-CSSC-NEXT: and x0, x8, #0x1
+; CHECK-CSSC-NEXT: ret
%1 = tail call i64 @llvm.ctpop.i64(i64 %x)
%2 = and i64 %1, 1
ret i64 %2
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: parity_128:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: eor x8, x0, x1
+; CHECK-CSSC-NEXT: mov x1, xzr
+; CHECK-CSSC-NEXT: cnt x8, x8
+; CHECK-CSSC-NEXT: and x0, x8, #0x1
+; CHECK-CSSC-NEXT: ret
%1 = tail call i128 @llvm.ctpop.i128(i128 %x)
%2 = and i128 %1, 1
ret i128 %2
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: parity_64_trunc:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: cnt x8, x0
+; CHECK-CSSC-NEXT: and w0, w8, #0x1
+; CHECK-CSSC-NEXT: ret
%1 = tail call i64 @llvm.ctpop.i64(i64 %x)
%2 = trunc i64 %1 to i32
%3 = and i32 %2, 1
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: parity_32_trunc:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: cnt w8, w0
+; CHECK-CSSC-NEXT: and w0, w8, #0x1
+; CHECK-CSSC-NEXT: ret
%1 = tail call i32 @llvm.ctpop.i32(i32 %x)
%2 = trunc i32 %1 to i8
%3 = and i8 %2, 1
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: parity_8_zext:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: and w8, w0, #0xff
+; CHECK-CSSC-NEXT: cnt w8, w8
+; CHECK-CSSC-NEXT: and w0, w8, #0x1
+; CHECK-CSSC-NEXT: ret
%a = zext i8 %x to i32
%b = tail call i32 @llvm.ctpop.i32(i32 %a)
%c = and i32 %b, 1
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: parity_8_mask:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: and w8, w0, #0xff
+; CHECK-CSSC-NEXT: cnt w8, w8
+; CHECK-CSSC-NEXT: and w0, w8, #0x1
+; CHECK-CSSC-NEXT: ret
%a = and i32 %x, 255
%b = tail call i32 @llvm.ctpop.i32(i32 %a)
%c = and i32 %b, 1