From d4b2b5f17ce7b9fde637eec86bdf035b7b8abb22 Mon Sep 17 00:00:00 2001 From: Jin Kyu Song Date: Fri, 13 Sep 2013 14:12:55 -0700 Subject: [PATCH] AVX-512: Add AVX-512CD instructions Added Conflict Detection (AVX-512CD) instructions. These instructions are supported if CPUID.(EAX=07H, ECX=0):EBX.AVX512CD[bit 28] = 1. Signed-off-by: Jin Kyu Song Signed-off-by: Cyrill Gorcunov --- insns.dat | 10 ++++++ insns.h | 1 + test/avx512cd.asm | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+) create mode 100644 test/avx512cd.asm diff --git a/insns.dat b/insns.dat index ad72d61..3c59da2 100644 --- a/insns.dat +++ b/insns.dat @@ -4054,6 +4054,16 @@ KUNPCKBW kreg,kreg,kreg [rvm: vex.nds.l1.66.0f.w0 4b /r ] AVX512,FUTURE KXNORW kreg,kreg,kreg [rvm: vex.nds.l1.0f.w0 46 /r ] AVX512,FUTURE KXORW kreg,kreg,kreg [rvm: vex.nds.l1.0f.w0 47 /r ] AVX512,FUTURE +; AVX-512CD (Conflict Detection) instructions +VPBROADCASTMB2Q zmmreg,kreg [rm: evex.512.f3.0f38.w1 2a /r ] AVX512CD,FUTURE +VPBROADCASTMW2D zmmreg,kreg [rm: evex.512.f3.0f38.w0 3a /r ] AVX512CD,FUTURE +VPCONFLICTD zmmreg|mask|z,zmmrm512|b32 [rm:fv: evex.512.66.0f38.w0 c4 /r ] AVX512CD,FUTURE +VPCONFLICTQ zmmreg|mask|z,zmmrm512|b64 [rm:fv: evex.512.66.0f38.w1 c4 /r ] AVX512CD,FUTURE +VPLZCNTD zmmreg|mask|z,zmmrm512|b32 [rm:fv: evex.512.66.0f38.w0 44 /r ] AVX512CD,FUTURE +VPLZCNTQ zmmreg|mask|z,zmmrm512|b64 [rm:fv: evex.512.66.0f38.w1 44 /r ] AVX512CD,FUTURE +VPTESTNMD kreg|mask,zmmreg,zmmrm512|b32 [rvm:fv: evex.nds.512.f3.0f38.w0 27 /r ] AVX512CD,FUTURE +VPTESTNMQ kreg|mask,zmmreg,zmmrm512|b64 [rvm:fv: evex.nds.512.f3.0f38.w1 27 /r ] AVX512CD,FUTURE + ;# Systematic names for the hinting nop instructions ; These should be last in the file diff --git a/insns.h b/insns.h index 19b27ae..3959a05 100644 --- a/insns.h +++ b/insns.h @@ -128,6 +128,7 @@ extern const uint8_t nasm_bytecodes[]; #define IF_TBM 0x1300000000UL /* HACK NEED TO REORGANIZE THESE BITS */ #define IF_RTM 0x1400000000UL /* HACK NEED TO REORGANIZE THESE BITS */ #define IF_INVPCID 0x1500000000UL /* HACK NEED TO REORGANIZE THESE BITS */ +#define IF_AVX512CD 0x1600000000UL /* AVX-512 Conflict Detection insns */ #define IF_INSMASK 0xFF00000000UL /* the mask for instruction set types */ #define IF_PMASK 0xFF000000UL /* the mask for processor types */ #define IF_PLEVEL 0x0F000000UL /* the mask for processor instr. level */ diff --git a/test/avx512cd.asm b/test/avx512cd.asm new file mode 100644 index 0000000..670a6fc --- /dev/null +++ b/test/avx512cd.asm @@ -0,0 +1,105 @@ +; AVX-512CD testcases from gas +;------------------------ +; +; This file is taken from there +; https://gnu.googlesource.com/binutils/+/master/gas/testsuite/gas/i386/x86-64-avx512cd-intel.d +; So the original author is "H.J. Lu" +; +; Jin Kyu Song converted it for the nasm testing suite using gas2nasm.py + +%macro testcase 2 + %ifdef BIN + db %1 + %endif + %ifdef SRC + %2 + %endif +%endmacro + + +bits 64 + +testcase { 0x62, 0x02, 0x7d, 0x48, 0xc4, 0xf5 }, { vpconflictd zmm30,zmm29 } +testcase { 0x62, 0x02, 0x7d, 0x4f, 0xc4, 0xf5 }, { vpconflictd zmm30\{k7\},zmm29 } +testcase { 0x62, 0x02, 0x7d, 0xcf, 0xc4, 0xf5 }, { vpconflictd zmm30\{k7\}\{z\},zmm29 } +testcase { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0x31 }, { vpconflictd zmm30,ZWORD [rcx] } +testcase { 0x62, 0x22, 0x7d, 0x48, 0xc4, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00 }, { vpconflictd zmm30,ZWORD [rax+r14*8+0x123] } +testcase { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0x31 }, { vpconflictd zmm30,DWORD [rcx]\{1to16\} } +testcase { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0x72, 0x7f }, { vpconflictd zmm30,ZWORD [rdx+0x1fc0] } +testcase { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0xb2, 0x00, 0x20, 0x00, 0x00 }, { vpconflictd zmm30,ZWORD [rdx+0x2000] } +testcase { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0x72, 0x80 }, { vpconflictd zmm30,ZWORD [rdx-0x2000] } +testcase { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0xb2, 0xc0, 0xdf, 0xff, 0xff }, { vpconflictd zmm30,ZWORD [rdx-0x2040] } +testcase { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0x72, 0x7f }, { vpconflictd zmm30,DWORD [rdx+0x1fc]\{1to16\} } +testcase { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0xb2, 0x00, 0x02, 0x00, 0x00 }, { vpconflictd zmm30,DWORD [rdx+0x200]\{1to16\} } +testcase { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0x72, 0x80 }, { vpconflictd zmm30,DWORD [rdx-0x200]\{1to16\} } +testcase { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0xb2, 0xfc, 0xfd, 0xff, 0xff }, { vpconflictd zmm30,DWORD [rdx-0x204]\{1to16\} } +testcase { 0x62, 0x02, 0xfd, 0x48, 0xc4, 0xf5 }, { vpconflictq zmm30,zmm29 } +testcase { 0x62, 0x02, 0xfd, 0x4f, 0xc4, 0xf5 }, { vpconflictq zmm30\{k7\},zmm29 } +testcase { 0x62, 0x02, 0xfd, 0xcf, 0xc4, 0xf5 }, { vpconflictq zmm30\{k7\}\{z\},zmm29 } +testcase { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0x31 }, { vpconflictq zmm30,ZWORD [rcx] } +testcase { 0x62, 0x22, 0xfd, 0x48, 0xc4, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00 }, { vpconflictq zmm30,ZWORD [rax+r14*8+0x123] } +testcase { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0x31 }, { vpconflictq zmm30,QWORD [rcx]\{1to8\} } +testcase { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0x72, 0x7f }, { vpconflictq zmm30,ZWORD [rdx+0x1fc0] } +testcase { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0xb2, 0x00, 0x20, 0x00, 0x00 }, { vpconflictq zmm30,ZWORD [rdx+0x2000] } +testcase { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0x72, 0x80 }, { vpconflictq zmm30,ZWORD [rdx-0x2000] } +testcase { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0xb2, 0xc0, 0xdf, 0xff, 0xff }, { vpconflictq zmm30,ZWORD [rdx-0x2040] } +testcase { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0x72, 0x7f }, { vpconflictq zmm30,QWORD [rdx+0x3f8]\{1to8\} } +testcase { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0xb2, 0x00, 0x04, 0x00, 0x00 }, { vpconflictq zmm30,QWORD [rdx+0x400]\{1to8\} } +testcase { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0x72, 0x80 }, { vpconflictq zmm30,QWORD [rdx-0x400]\{1to8\} } +testcase { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0xb2, 0xf8, 0xfb, 0xff, 0xff }, { vpconflictq zmm30,QWORD [rdx-0x408]\{1to8\} } +testcase { 0x62, 0x02, 0x7d, 0x48, 0x44, 0xf5 }, { vplzcntd zmm30,zmm29 } +testcase { 0x62, 0x02, 0x7d, 0x4f, 0x44, 0xf5 }, { vplzcntd zmm30\{k7\},zmm29 } +testcase { 0x62, 0x02, 0x7d, 0xcf, 0x44, 0xf5 }, { vplzcntd zmm30\{k7\}\{z\},zmm29 } +testcase { 0x62, 0x62, 0x7d, 0x48, 0x44, 0x31 }, { vplzcntd zmm30,ZWORD [rcx] } +testcase { 0x62, 0x22, 0x7d, 0x48, 0x44, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00 }, { vplzcntd zmm30,ZWORD [rax+r14*8+0x123] } +testcase { 0x62, 0x62, 0x7d, 0x58, 0x44, 0x31 }, { vplzcntd zmm30,DWORD [rcx]\{1to16\} } +testcase { 0x62, 0x62, 0x7d, 0x48, 0x44, 0x72, 0x7f }, { vplzcntd zmm30,ZWORD [rdx+0x1fc0] } +testcase { 0x62, 0x62, 0x7d, 0x48, 0x44, 0xb2, 0x00, 0x20, 0x00, 0x00 }, { vplzcntd zmm30,ZWORD [rdx+0x2000] } +testcase { 0x62, 0x62, 0x7d, 0x48, 0x44, 0x72, 0x80 }, { vplzcntd zmm30,ZWORD [rdx-0x2000] } +testcase { 0x62, 0x62, 0x7d, 0x48, 0x44, 0xb2, 0xc0, 0xdf, 0xff, 0xff }, { vplzcntd zmm30,ZWORD [rdx-0x2040] } +testcase { 0x62, 0x62, 0x7d, 0x58, 0x44, 0x72, 0x7f }, { vplzcntd zmm30,DWORD [rdx+0x1fc]\{1to16\} } +testcase { 0x62, 0x62, 0x7d, 0x58, 0x44, 0xb2, 0x00, 0x02, 0x00, 0x00 }, { vplzcntd zmm30,DWORD [rdx+0x200]\{1to16\} } +testcase { 0x62, 0x62, 0x7d, 0x58, 0x44, 0x72, 0x80 }, { vplzcntd zmm30,DWORD [rdx-0x200]\{1to16\} } +testcase { 0x62, 0x62, 0x7d, 0x58, 0x44, 0xb2, 0xfc, 0xfd, 0xff, 0xff }, { vplzcntd zmm30,DWORD [rdx-0x204]\{1to16\} } +testcase { 0x62, 0x02, 0xfd, 0x48, 0x44, 0xf5 }, { vplzcntq zmm30,zmm29 } +testcase { 0x62, 0x02, 0xfd, 0x4f, 0x44, 0xf5 }, { vplzcntq zmm30\{k7\},zmm29 } +testcase { 0x62, 0x02, 0xfd, 0xcf, 0x44, 0xf5 }, { vplzcntq zmm30\{k7\}\{z\},zmm29 } +testcase { 0x62, 0x62, 0xfd, 0x48, 0x44, 0x31 }, { vplzcntq zmm30,ZWORD [rcx] } +testcase { 0x62, 0x22, 0xfd, 0x48, 0x44, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00 }, { vplzcntq zmm30,ZWORD [rax+r14*8+0x123] } +testcase { 0x62, 0x62, 0xfd, 0x58, 0x44, 0x31 }, { vplzcntq zmm30,QWORD [rcx]\{1to8\} } +testcase { 0x62, 0x62, 0xfd, 0x48, 0x44, 0x72, 0x7f }, { vplzcntq zmm30,ZWORD [rdx+0x1fc0] } +testcase { 0x62, 0x62, 0xfd, 0x48, 0x44, 0xb2, 0x00, 0x20, 0x00, 0x00 }, { vplzcntq zmm30,ZWORD [rdx+0x2000] } +testcase { 0x62, 0x62, 0xfd, 0x48, 0x44, 0x72, 0x80 }, { vplzcntq zmm30,ZWORD [rdx-0x2000] } +testcase { 0x62, 0x62, 0xfd, 0x48, 0x44, 0xb2, 0xc0, 0xdf, 0xff, 0xff }, { vplzcntq zmm30,ZWORD [rdx-0x2040] } +testcase { 0x62, 0x62, 0xfd, 0x58, 0x44, 0x72, 0x7f }, { vplzcntq zmm30,QWORD [rdx+0x3f8]\{1to8\} } +testcase { 0x62, 0x62, 0xfd, 0x58, 0x44, 0xb2, 0x00, 0x04, 0x00, 0x00 }, { vplzcntq zmm30,QWORD [rdx+0x400]\{1to8\} } +testcase { 0x62, 0x62, 0xfd, 0x58, 0x44, 0x72, 0x80 }, { vplzcntq zmm30,QWORD [rdx-0x400]\{1to8\} } +testcase { 0x62, 0x62, 0xfd, 0x58, 0x44, 0xb2, 0xf8, 0xfb, 0xff, 0xff }, { vplzcntq zmm30,QWORD [rdx-0x408]\{1to8\} } +testcase { 0x62, 0x92, 0x16, 0x40, 0x27, 0xec }, { vptestnmd k5,zmm29,zmm28 } +testcase { 0x62, 0x92, 0x16, 0x47, 0x27, 0xec }, { vptestnmd k5\{k7\},zmm29,zmm28 } +testcase { 0x62, 0xf2, 0x16, 0x40, 0x27, 0x29 }, { vptestnmd k5,zmm29,ZWORD [rcx] } +testcase { 0x62, 0xb2, 0x16, 0x40, 0x27, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00 }, { vptestnmd k5,zmm29,ZWORD [rax+r14*8+0x123] } +testcase { 0x62, 0xf2, 0x16, 0x50, 0x27, 0x29 }, { vptestnmd k5,zmm29,DWORD [rcx]\{1to16\} } +testcase { 0x62, 0xf2, 0x16, 0x40, 0x27, 0x6a, 0x7f }, { vptestnmd k5,zmm29,ZWORD [rdx+0x1fc0] } +testcase { 0x62, 0xf2, 0x16, 0x40, 0x27, 0xaa, 0x00, 0x20, 0x00, 0x00 }, { vptestnmd k5,zmm29,ZWORD [rdx+0x2000] } +testcase { 0x62, 0xf2, 0x16, 0x40, 0x27, 0x6a, 0x80 }, { vptestnmd k5,zmm29,ZWORD [rdx-0x2000] } +testcase { 0x62, 0xf2, 0x16, 0x40, 0x27, 0xaa, 0xc0, 0xdf, 0xff, 0xff }, { vptestnmd k5,zmm29,ZWORD [rdx-0x2040] } +testcase { 0x62, 0xf2, 0x16, 0x50, 0x27, 0x6a, 0x7f }, { vptestnmd k5,zmm29,DWORD [rdx+0x1fc]\{1to16\} } +testcase { 0x62, 0xf2, 0x16, 0x50, 0x27, 0xaa, 0x00, 0x02, 0x00, 0x00 }, { vptestnmd k5,zmm29,DWORD [rdx+0x200]\{1to16\} } +testcase { 0x62, 0xf2, 0x16, 0x50, 0x27, 0x6a, 0x80 }, { vptestnmd k5,zmm29,DWORD [rdx-0x200]\{1to16\} } +testcase { 0x62, 0xf2, 0x16, 0x50, 0x27, 0xaa, 0xfc, 0xfd, 0xff, 0xff }, { vptestnmd k5,zmm29,DWORD [rdx-0x204]\{1to16\} } +testcase { 0x62, 0x92, 0x96, 0x40, 0x27, 0xec }, { vptestnmq k5,zmm29,zmm28 } +testcase { 0x62, 0x92, 0x96, 0x47, 0x27, 0xec }, { vptestnmq k5\{k7\},zmm29,zmm28 } +testcase { 0x62, 0xf2, 0x96, 0x40, 0x27, 0x29 }, { vptestnmq k5,zmm29,ZWORD [rcx] } +testcase { 0x62, 0xb2, 0x96, 0x40, 0x27, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00 }, { vptestnmq k5,zmm29,ZWORD [rax+r14*8+0x123] } +testcase { 0x62, 0xf2, 0x96, 0x50, 0x27, 0x29 }, { vptestnmq k5,zmm29,QWORD [rcx]\{1to8\} } +testcase { 0x62, 0xf2, 0x96, 0x40, 0x27, 0x6a, 0x7f }, { vptestnmq k5,zmm29,ZWORD [rdx+0x1fc0] } +testcase { 0x62, 0xf2, 0x96, 0x40, 0x27, 0xaa, 0x00, 0x20, 0x00, 0x00 }, { vptestnmq k5,zmm29,ZWORD [rdx+0x2000] } +testcase { 0x62, 0xf2, 0x96, 0x40, 0x27, 0x6a, 0x80 }, { vptestnmq k5,zmm29,ZWORD [rdx-0x2000] } +testcase { 0x62, 0xf2, 0x96, 0x40, 0x27, 0xaa, 0xc0, 0xdf, 0xff, 0xff }, { vptestnmq k5,zmm29,ZWORD [rdx-0x2040] } +testcase { 0x62, 0xf2, 0x96, 0x50, 0x27, 0x6a, 0x7f }, { vptestnmq k5,zmm29,QWORD [rdx+0x3f8]\{1to8\} } +testcase { 0x62, 0xf2, 0x96, 0x50, 0x27, 0xaa, 0x00, 0x04, 0x00, 0x00 }, { vptestnmq k5,zmm29,QWORD [rdx+0x400]\{1to8\} } +testcase { 0x62, 0xf2, 0x96, 0x50, 0x27, 0x6a, 0x80 }, { vptestnmq k5,zmm29,QWORD [rdx-0x400]\{1to8\} } +testcase { 0x62, 0xf2, 0x96, 0x50, 0x27, 0xaa, 0xf8, 0xfb, 0xff, 0xff }, { vptestnmq k5,zmm29,QWORD [rdx-0x408]\{1to8\} } +testcase { 0x62, 0x62, 0x7e, 0x48, 0x3a, 0xf6 }, { vpbroadcastmw2d zmm30,k6 } +testcase { 0x62, 0x62, 0xfe, 0x48, 0x2a, 0xf6 }, { vpbroadcastmb2q zmm30,k6 } -- 2.7.4