+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
; RUN: llc < %s -mtriple=aarch64-eabi -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
+; CHECK-LABEL: cnt32_advsimd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: cnt.8b v0, v0
+; CHECK-NEXT: uaddlv.8b h0, v0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; CHECK-NONEON-LABEL: cnt32_advsimd:
+; CHECK-NONEON: // %bb.0:
+; CHECK-NONEON-NEXT: lsr w8, w0, #1
+; CHECK-NONEON-NEXT: and w8, w8, #0x55555555
+; CHECK-NONEON-NEXT: sub w8, w0, w8
+; CHECK-NONEON-NEXT: and w9, w8, #0x33333333
+; CHECK-NONEON-NEXT: lsr w8, w8, #2
+; CHECK-NONEON-NEXT: and w8, w8, #0x33333333
+; CHECK-NONEON-NEXT: add w8, w9, w8
+; CHECK-NONEON-NEXT: add w8, w8, w8, lsr #4
+; CHECK-NONEON-NEXT: and w8, w8, #0xf0f0f0f
+; CHECK-NONEON-NEXT: mov w9, #16843009
+; CHECK-NONEON-NEXT: mul w8, w8, w9
+; CHECK-NONEON-NEXT: lsr w0, w8, #24
+; CHECK-NONEON-NEXT: ret
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %cnt
-; CHECK: mov w[[IN64:[0-9]+]], w0
-; CHECK: fmov d0, x[[IN64]]
-; CHECK: cnt.8b v0, v0
-; CHECK: uaddlv.8b h0, v0
-; CHECK: fmov w0, s0
-; CHECK: ret
-; CHECK-NONEON-LABEL: cnt32_advsimd
-; CHECK-NONEON-NOT: 8b
-; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x55555555
-; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x33333333
-; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0xf0f0f0f
-; CHECK-NONEON: mul
}
define i32 @cnt32_advsimd_2(<2 x i32> %x) {
+; CHECK-LABEL: cnt32_advsimd_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: cnt.8b v0, v0
+; CHECK-NEXT: uaddlv.8b h0, v0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; CHECK-NONEON-LABEL: cnt32_advsimd_2:
+; CHECK-NONEON: // %bb.0:
+; CHECK-NONEON-NEXT: lsr w8, w0, #1
+; CHECK-NONEON-NEXT: and w8, w8, #0x55555555
+; CHECK-NONEON-NEXT: sub w8, w0, w8
+; CHECK-NONEON-NEXT: and w9, w8, #0x33333333
+; CHECK-NONEON-NEXT: lsr w8, w8, #2
+; CHECK-NONEON-NEXT: and w8, w8, #0x33333333
+; CHECK-NONEON-NEXT: add w8, w9, w8
+; CHECK-NONEON-NEXT: add w8, w8, w8, lsr #4
+; CHECK-NONEON-NEXT: and w8, w8, #0xf0f0f0f
+; CHECK-NONEON-NEXT: mov w9, #16843009
+; CHECK-NONEON-NEXT: mul w8, w8, w9
+; CHECK-NONEON-NEXT: lsr w0, w8, #24
+; CHECK-NONEON-NEXT: ret
%1 = extractelement <2 x i32> %x, i64 0
%2 = tail call i32 @llvm.ctpop.i32(i32 %1)
ret i32 %2
-; CHECK: fmov w0, s0
-; CHECK: fmov d0, x0
-; CHECK: cnt.8b v0, v0
-; CHECK: uaddlv.8b h0, v0
-; CHECK: fmov w0, s0
-; CHECK: ret
-; CHECK-NONEON-LABEL: cnt32_advsimd_2
-; CHECK-NONEON-NOT: 8b
-; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x55555555
-; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x33333333
-; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0xf0f0f0f
-; CHECK-NONEON: mul
}
define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
+; CHECK-LABEL: cnt64_advsimd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: cnt.8b v0, v0
+; CHECK-NEXT: uaddlv.8b h0, v0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+;
+; CHECK-NONEON-LABEL: cnt64_advsimd:
+; CHECK-NONEON: // %bb.0:
+; CHECK-NONEON-NEXT: lsr x8, x0, #1
+; CHECK-NONEON-NEXT: and x8, x8, #0x5555555555555555
+; CHECK-NONEON-NEXT: sub x8, x0, x8
+; CHECK-NONEON-NEXT: and x9, x8, #0x3333333333333333
+; CHECK-NONEON-NEXT: lsr x8, x8, #2
+; CHECK-NONEON-NEXT: and x8, x8, #0x3333333333333333
+; CHECK-NONEON-NEXT: add x8, x9, x8
+; CHECK-NONEON-NEXT: add x8, x8, x8, lsr #4
+; CHECK-NONEON-NEXT: and x8, x8, #0xf0f0f0f0f0f0f0f
+; CHECK-NONEON-NEXT: mov x9, #72340172838076673
+; CHECK-NONEON-NEXT: mul x8, x8, x9
+; CHECK-NONEON-NEXT: lsr x0, x8, #56
+; CHECK-NONEON-NEXT: ret
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
ret i64 %cnt
-; CHECK: fmov d0, x0
-; CHECK: cnt.8b v0, v0
-; CHECK: uaddlv.8b h0, v0
-; CHECK: fmov w0, s0
-; CHECK: ret
-; CHECK-NONEON-LABEL: cnt64_advsimd
-; CHECK-NONEON-NOT: 8b
-; CHECK-NONEON: and x{{[0-9]+}}, x{{[0-9]+}}, #0x5555555555555555
-; CHECK-NONEON: and x{{[0-9]+}}, x{{[0-9]+}}, #0x3333333333333333
-; CHECK-NONEON: and x{{[0-9]+}}, x{{[0-9]+}}, #0xf0f0f0f0f0f0f0f
-; CHECK-NONEON: mul
}
; Do not use AdvSIMD when -mno-implicit-float is specified.
; rdar://9473858
define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
+; CHECK-LABEL: cnt32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsr w8, w0, #1
+; CHECK-NEXT: and w8, w8, #0x55555555
+; CHECK-NEXT: sub w8, w0, w8
+; CHECK-NEXT: and w9, w8, #0x33333333
+; CHECK-NEXT: lsr w8, w8, #2
+; CHECK-NEXT: and w8, w8, #0x33333333
+; CHECK-NEXT: add w8, w9, w8
+; CHECK-NEXT: add w8, w8, w8, lsr #4
+; CHECK-NEXT: and w8, w8, #0xf0f0f0f
+; CHECK-NEXT: mov w9, #16843009
+; CHECK-NEXT: mul w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, #24
+; CHECK-NEXT: ret
+;
+; CHECK-NONEON-LABEL: cnt32:
+; CHECK-NONEON: // %bb.0:
+; CHECK-NONEON-NEXT: lsr w8, w0, #1
+; CHECK-NONEON-NEXT: and w8, w8, #0x55555555
+; CHECK-NONEON-NEXT: sub w8, w0, w8
+; CHECK-NONEON-NEXT: and w9, w8, #0x33333333
+; CHECK-NONEON-NEXT: lsr w8, w8, #2
+; CHECK-NONEON-NEXT: and w8, w8, #0x33333333
+; CHECK-NONEON-NEXT: add w8, w9, w8
+; CHECK-NONEON-NEXT: add w8, w8, w8, lsr #4
+; CHECK-NONEON-NEXT: and w8, w8, #0xf0f0f0f
+; CHECK-NONEON-NEXT: mov w9, #16843009
+; CHECK-NONEON-NEXT: mul w8, w8, w9
+; CHECK-NONEON-NEXT: lsr w0, w8, #24
+; CHECK-NONEON-NEXT: ret
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %cnt
-; CHECK-LABEL: cnt32:
-; CHECK-NOT: 16b
-; CHECK: ret
}
define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
+; CHECK-LABEL: cnt64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsr x8, x0, #1
+; CHECK-NEXT: and x8, x8, #0x5555555555555555
+; CHECK-NEXT: sub x8, x0, x8
+; CHECK-NEXT: and x9, x8, #0x3333333333333333
+; CHECK-NEXT: lsr x8, x8, #2
+; CHECK-NEXT: and x8, x8, #0x3333333333333333
+; CHECK-NEXT: add x8, x9, x8
+; CHECK-NEXT: add x8, x8, x8, lsr #4
+; CHECK-NEXT: and x8, x8, #0xf0f0f0f0f0f0f0f
+; CHECK-NEXT: mov x9, #72340172838076673
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: lsr x0, x8, #56
+; CHECK-NEXT: ret
+;
+; CHECK-NONEON-LABEL: cnt64:
+; CHECK-NONEON: // %bb.0:
+; CHECK-NONEON-NEXT: lsr x8, x0, #1
+; CHECK-NONEON-NEXT: and x8, x8, #0x5555555555555555
+; CHECK-NONEON-NEXT: sub x8, x0, x8
+; CHECK-NONEON-NEXT: and x9, x8, #0x3333333333333333
+; CHECK-NONEON-NEXT: lsr x8, x8, #2
+; CHECK-NONEON-NEXT: and x8, x8, #0x3333333333333333
+; CHECK-NONEON-NEXT: add x8, x9, x8
+; CHECK-NONEON-NEXT: add x8, x8, x8, lsr #4
+; CHECK-NONEON-NEXT: and x8, x8, #0xf0f0f0f0f0f0f0f
+; CHECK-NONEON-NEXT: mov x9, #72340172838076673
+; CHECK-NONEON-NEXT: mul x8, x8, x9
+; CHECK-NONEON-NEXT: lsr x0, x8, #56
+; CHECK-NONEON-NEXT: ret
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
ret i64 %cnt
-; CHECK-LABEL: cnt64:
-; CHECK-NOT: 16b
-; CHECK: ret
}
+define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
+; CHECK-LABEL: ctpop_eq_one:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: cnt.8b v0, v0
+; CHECK-NEXT: uaddlv.8b h0, v0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: cmp x8, #1 // =1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+;
+; CHECK-NONEON-LABEL: ctpop_eq_one:
+; CHECK-NONEON: // %bb.0:
+; CHECK-NONEON-NEXT: lsr x8, x0, #1
+; CHECK-NONEON-NEXT: and x8, x8, #0x5555555555555555
+; CHECK-NONEON-NEXT: sub x8, x0, x8
+; CHECK-NONEON-NEXT: and x9, x8, #0x3333333333333333
+; CHECK-NONEON-NEXT: lsr x8, x8, #2
+; CHECK-NONEON-NEXT: and x8, x8, #0x3333333333333333
+; CHECK-NONEON-NEXT: add x8, x9, x8
+; CHECK-NONEON-NEXT: add x8, x8, x8, lsr #4
+; CHECK-NONEON-NEXT: and x8, x8, #0xf0f0f0f0f0f0f0f
+; CHECK-NONEON-NEXT: mov x9, #72340172838076673
+; CHECK-NONEON-NEXT: mul x8, x8, x9
+; CHECK-NONEON-NEXT: lsr x8, x8, #56
+; CHECK-NONEON-NEXT: cmp x8, #1 // =1
+; CHECK-NONEON-NEXT: cset w0, eq
+; CHECK-NONEON-NEXT: ret
+ %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+ %cmp = icmp eq i64 %count, 1
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s -check-prefixes=CHECK,POPCOUNT
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=-popcnt | FileCheck %s -check-prefixes=CHECK,NO-POPCOUNT
declare i8 @llvm.ctpop.i8(i8) nounwind readnone
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
}
define i32 @test3(i64 %x) nounwind readnone {
-; CHECK-LABEL: test3:
-; CHECK: # %bb.0:
-; CHECK-NEXT: popcntq %rdi, %rcx
-; CHECK-NEXT: andb $63, %cl
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: cmpb $2, %cl
-; CHECK-NEXT: setb %al
-; CHECK-NEXT: retq
+; POPCOUNT-LABEL: test3:
+; POPCOUNT: # %bb.0:
+; POPCOUNT-NEXT: popcntq %rdi, %rcx
+; POPCOUNT-NEXT: andb $63, %cl
+; POPCOUNT-NEXT: xorl %eax, %eax
+; POPCOUNT-NEXT: cmpb $2, %cl
+; POPCOUNT-NEXT: setb %al
+; POPCOUNT-NEXT: retq
+;
+; NO-POPCOUNT-LABEL: test3:
+; NO-POPCOUNT: # %bb.0:
+; NO-POPCOUNT-NEXT: movq %rdi, %rax
+; NO-POPCOUNT-NEXT: shrq %rax
+; NO-POPCOUNT-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; NO-POPCOUNT-NEXT: andq %rax, %rcx
+; NO-POPCOUNT-NEXT: subq %rcx, %rdi
+; NO-POPCOUNT-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; NO-POPCOUNT-NEXT: movq %rdi, %rcx
+; NO-POPCOUNT-NEXT: andq %rax, %rcx
+; NO-POPCOUNT-NEXT: shrq $2, %rdi
+; NO-POPCOUNT-NEXT: andq %rax, %rdi
+; NO-POPCOUNT-NEXT: addq %rcx, %rdi
+; NO-POPCOUNT-NEXT: movq %rdi, %rax
+; NO-POPCOUNT-NEXT: shrq $4, %rax
+; NO-POPCOUNT-NEXT: addq %rdi, %rax
+; NO-POPCOUNT-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; NO-POPCOUNT-NEXT: andq %rax, %rcx
+; NO-POPCOUNT-NEXT: movabsq $72340172838076673, %rdx # imm = 0x101010101010101
+; NO-POPCOUNT-NEXT: imulq %rcx, %rdx
+; NO-POPCOUNT-NEXT: shrq $56, %rdx
+; NO-POPCOUNT-NEXT: andb $63, %dl
+; NO-POPCOUNT-NEXT: xorl %eax, %eax
+; NO-POPCOUNT-NEXT: cmpb $2, %dl
+; NO-POPCOUNT-NEXT: setb %al
+; NO-POPCOUNT-NEXT: retq
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%cast = trunc i64 %count to i6 ; Too small for 0-64
%cmp = icmp ult i6 %cast, 2
}
define i8 @test4(i8 %x) nounwind readnone {
-; CHECK-LABEL: test4:
-; CHECK: # %bb.0:
-; CHECK-NEXT: andl $127, %edi
-; CHECK-NEXT: popcntl %edi, %eax
-; CHECK-NEXT: # kill: def $al killed $al killed $eax
-; CHECK-NEXT: retq
+; POPCOUNT-LABEL: test4:
+; POPCOUNT: # %bb.0:
+; POPCOUNT-NEXT: andl $127, %edi
+; POPCOUNT-NEXT: popcntl %edi, %eax
+; POPCOUNT-NEXT: # kill: def $al killed $al killed $eax
+; POPCOUNT-NEXT: retq
+;
+; NO-POPCOUNT-LABEL: test4:
+; NO-POPCOUNT: # %bb.0:
+; NO-POPCOUNT-NEXT: # kill: def $edi killed $edi def $rdi
+; NO-POPCOUNT-NEXT: andb $127, %dil
+; NO-POPCOUNT-NEXT: movl %edi, %eax
+; NO-POPCOUNT-NEXT: shrb %al
+; NO-POPCOUNT-NEXT: andb $21, %al
+; NO-POPCOUNT-NEXT: subb %al, %dil
+; NO-POPCOUNT-NEXT: movl %edi, %eax
+; NO-POPCOUNT-NEXT: andb $51, %al
+; NO-POPCOUNT-NEXT: shrb $2, %dil
+; NO-POPCOUNT-NEXT: andb $51, %dil
+; NO-POPCOUNT-NEXT: addb %al, %dil
+; NO-POPCOUNT-NEXT: movl %edi, %eax
+; NO-POPCOUNT-NEXT: shrb $4, %al
+; NO-POPCOUNT-NEXT: addl %edi, %eax
+; NO-POPCOUNT-NEXT: andb $15, %al
+; NO-POPCOUNT-NEXT: # kill: def $al killed $al killed $eax
+; NO-POPCOUNT-NEXT: retq
%x2 = and i8 %x, 127
%count = tail call i8 @llvm.ctpop.i8(i8 %x2)
%and = and i8 %count, 7
ret i8 %and
}
+
+define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
+; POPCOUNT-LABEL: ctpop_eq_one:
+; POPCOUNT: # %bb.0:
+; POPCOUNT-NEXT: popcntq %rdi, %rcx
+; POPCOUNT-NEXT: xorl %eax, %eax
+; POPCOUNT-NEXT: cmpq $1, %rcx
+; POPCOUNT-NEXT: sete %al
+; POPCOUNT-NEXT: retq
+;
+; NO-POPCOUNT-LABEL: ctpop_eq_one:
+; NO-POPCOUNT: # %bb.0:
+; NO-POPCOUNT-NEXT: movq %rdi, %rax
+; NO-POPCOUNT-NEXT: shrq %rax
+; NO-POPCOUNT-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; NO-POPCOUNT-NEXT: andq %rax, %rcx
+; NO-POPCOUNT-NEXT: subq %rcx, %rdi
+; NO-POPCOUNT-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; NO-POPCOUNT-NEXT: movq %rdi, %rcx
+; NO-POPCOUNT-NEXT: andq %rax, %rcx
+; NO-POPCOUNT-NEXT: shrq $2, %rdi
+; NO-POPCOUNT-NEXT: andq %rax, %rdi
+; NO-POPCOUNT-NEXT: addq %rcx, %rdi
+; NO-POPCOUNT-NEXT: movq %rdi, %rax
+; NO-POPCOUNT-NEXT: shrq $4, %rax
+; NO-POPCOUNT-NEXT: addq %rdi, %rax
+; NO-POPCOUNT-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; NO-POPCOUNT-NEXT: andq %rax, %rcx
+; NO-POPCOUNT-NEXT: movabsq $72340172838076673, %rdx # imm = 0x101010101010101
+; NO-POPCOUNT-NEXT: imulq %rcx, %rdx
+; NO-POPCOUNT-NEXT: shrq $56, %rdx
+; NO-POPCOUNT-NEXT: xorl %eax, %eax
+; NO-POPCOUNT-NEXT: cmpq $1, %rdx
+; NO-POPCOUNT-NEXT: sete %al
+; NO-POPCOUNT-NEXT: retq
+ %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+ %cmp = icmp eq i64 %count, 1
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}