(set_attr ("prefix") ("evex"))
(set_attr ("mode") ("TI"))])
+(define_expand "popcount<mode>2"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand")
+ (popcount:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))]
+ "TARGET_AVX512VPOPCNTDQ")
+
(define_insn "vpopcount<mode><mask_name>"
[(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
(popcount:VI48_AVX512VL
"TARGET_SSE && TARGET_64BIT"
"jmp\t%P1")
+(define_expand "popcount<mode>2"
+ [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+ (popcount:VI12_AVX512VL
+ (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512BITALG")
+
(define_insn "vpopcount<mode><mask_name>"
[(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
(popcount:VI12_AVX512VL
--- /dev/null
+/* PR target/97770 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bitalg -mavx512vl -mprefer-vector-width=512" } */
+/* Add xfail since no IFN for QI/HImode popcount */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*xmm" 1 {xfail *-*-*} } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*ymm" 1 {xfail *-*-*} } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\\n\\r\]*zmm" 1 {xfail *-*-*} } } */
+
+#include <immintrin.h>
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountb_128 (char * __restrict dest, char* src)
+{
+ for (int i = 0; i != 16; i++)
+ dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountw_128 (short* __restrict dest, short* src)
+{
+ for (int i = 0; i != 8; i++)
+ dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountb_256 (char * __restrict dest, char* src)
+{
+ for (int i = 0; i != 32; i++)
+ dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountw_256 (short* __restrict dest, short* src)
+{
+ for (int i = 0; i != 16; i++)
+ dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountb_512 (char * __restrict dest, char* src)
+{
+ for (int i = 0; i != 64; i++)
+ dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountw_512 (short* __restrict dest, short* src)
+{
+ for (int i = 0; i != 32; i++)
+ dest[i] = __builtin_popcount (src[i]);
+}
--- /dev/null
+/* PR target/97770 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl -mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\\n\\r\]*zmm" 1 } } */
+/* Add xfail since current vectorizor cannot generate expected code for DImode popcount */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*xmm" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*ymm" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\\n\\r\]*zmm" 1 { xfail *-*-* } } } */
+#ifndef AVX512VPOPCNTQ_H_INCLUDED
+#define AVX512VPOPCNTQ_H_INCLUDED
+
+#include <immintrin.h>
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountd_128 (int* __restrict dest, int* src)
+{
+ for (int i = 0; i != 4; i++)
+ dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountq_128 (long long* __restrict dest, long long* src)
+{
+ for (int i = 0; i != 2; i++)
+ dest[i] = __builtin_popcountll (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountd_256 (int* __restrict dest, int* src)
+{
+ for (int i = 0; i != 8; i++)
+ dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountq_256 (long long* __restrict dest, long long* src)
+{
+ for (int i = 0; i != 4; i++)
+ dest[i] = __builtin_popcountll (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountd_512 (int* __restrict dest, int* src)
+{
+ for (int i = 0; i != 16; i++)
+ dest[i] = __builtin_popcount (src[i]);
+}
+
+void
+__attribute__ ((noipa, optimize("-O3")))
+popcountq_512 (long long* __restrict dest, long long* src)
+{
+ for (int i = 0; i != 8; i++)
+ dest[i] = __builtin_popcountll (src[i]);
+}
+#endif
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vpopcntdq" } */
+
+#define AVX512VPOPCNTDQ
+
+#include "avx512f-helper.h"
+#include "avx512vpopcntdq-pr97770-1.c"
+
+#define SIZE_D AVX512F_LEN / 32
+#define SIZE_Q AVX512F_LEN / 64
+
+
+#define RTEST(TYPE, LEN, SIZE, MODE) \
+ do \
+ { \
+ TYPE res[SIZE], src[SIZE], res_ref[SIZE], v; \
+ int i, j, ret; \
+ for (i = 0; i < SIZE; i++) \
+ { \
+ v = src[i] = i * 2 + 3; \
+ ret = 0; \
+ for (j = 0; j < sizeof(v) * 8; j++) \
+ if ((v & ((TYPE)1 << (TYPE) j))) \
+ ret++; \
+ res_ref[i] = ret; \
+ } \
+ EVAL(popcount, MODE, LEN) (res, src); \
+ for (i = 0; i < SIZE; i++) \
+ if (res[i] != res_ref[i]) \
+ abort (); \
+ } \
+ while (0)
+
+void
+TEST (void)
+{
+ RTEST (long long, AVX512F_LEN, SIZE_Q, q_);
+ RTEST (int, AVX512F_LEN, SIZE_D, d_);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mavx512vpopcntdq -mavx512vl" } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512vpopcntdq-pr97770-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512vpopcntdq-pr97770-2.c"