AVX-512: Add AVX-512CD instructions
authorJin Kyu Song <jin.kyu.song@intel.com>
Fri, 13 Sep 2013 21:12:55 +0000 (14:12 -0700)
committerCyrill Gorcunov <gorcunov@gmail.com>
Fri, 13 Sep 2013 21:27:02 +0000 (01:27 +0400)
Added Conflict Detection (AVX-512CD) instructions.
These instructions are supported
if CPUID.(EAX=07H, ECX=0):EBX.AVX512CD[bit 28] = 1.

Signed-off-by: Jin Kyu Song <jin.kyu.song@intel.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
insns.dat
insns.h
test/avx512cd.asm [new file with mode: 0644]

index ad72d61..3c59da2 100644 (file)
--- a/insns.dat
+++ b/insns.dat
@@ -4054,6 +4054,16 @@ KUNPCKBW  kreg,kreg,kreg  [rvm:    vex.nds.l1.66.0f.w0 4b /r ]  AVX512,FUTURE
 KXNORW    kreg,kreg,kreg  [rvm:       vex.nds.l1.0f.w0 46 /r ]  AVX512,FUTURE
 KXORW     kreg,kreg,kreg  [rvm:       vex.nds.l1.0f.w0 47 /r ]  AVX512,FUTURE
 
+; AVX-512CD (Conflict Detection) instructions
+VPBROADCASTMB2Q  zmmreg,kreg                     [rm:             evex.512.f3.0f38.w1 2a /r ]  AVX512CD,FUTURE
+VPBROADCASTMW2D  zmmreg,kreg                     [rm:             evex.512.f3.0f38.w0 3a /r ]  AVX512CD,FUTURE
+VPCONFLICTD      zmmreg|mask|z,zmmrm512|b32      [rm:fv:          evex.512.66.0f38.w0 c4 /r ]  AVX512CD,FUTURE
+VPCONFLICTQ      zmmreg|mask|z,zmmrm512|b64      [rm:fv:          evex.512.66.0f38.w1 c4 /r ]  AVX512CD,FUTURE
+VPLZCNTD         zmmreg|mask|z,zmmrm512|b32      [rm:fv:          evex.512.66.0f38.w0 44 /r ]  AVX512CD,FUTURE
+VPLZCNTQ         zmmreg|mask|z,zmmrm512|b64      [rm:fv:          evex.512.66.0f38.w1 44 /r ]  AVX512CD,FUTURE
+VPTESTNMD        kreg|mask,zmmreg,zmmrm512|b32   [rvm:fv:     evex.nds.512.f3.0f38.w0 27 /r ]  AVX512CD,FUTURE
+VPTESTNMQ        kreg|mask,zmmreg,zmmrm512|b64   [rvm:fv:     evex.nds.512.f3.0f38.w1 27 /r ]  AVX512CD,FUTURE
+
 
 ;# Systematic names for the hinting nop instructions
 ; These should be last in the file
diff --git a/insns.h b/insns.h
index 19b27ae..3959a05 100644 (file)
--- a/insns.h
+++ b/insns.h
@@ -128,6 +128,7 @@ extern const uint8_t nasm_bytecodes[];
 #define IF_TBM          0x1300000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
 #define IF_RTM          0x1400000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
 #define IF_INVPCID      0x1500000000UL    /* HACK NEED TO REORGANIZE THESE BITS */
+#define IF_AVX512CD     0x1600000000UL    /* AVX-512 Conflict Detection insns */
 #define IF_INSMASK      0xFF00000000UL    /* the mask for instruction set types */
 #define IF_PMASK        0xFF000000UL    /* the mask for processor types */
 #define IF_PLEVEL       0x0F000000UL    /* the mask for processor instr. level */
diff --git a/test/avx512cd.asm b/test/avx512cd.asm
new file mode 100644 (file)
index 0000000..670a6fc
--- /dev/null
@@ -0,0 +1,105 @@
+; AVX-512CD testcases from gas
+;------------------------
+;
+; This file is taken from there
+;     https://gnu.googlesource.com/binutils/+/master/gas/testsuite/gas/i386/x86-64-avx512cd-intel.d
+; So the original author is "H.J. Lu" <hongjiu dot lu at intel dot com>
+;
+; Jin Kyu Song converted it for the nasm testing suite using gas2nasm.py
+
+%macro testcase 2
+ %ifdef BIN
+  db %1
+ %endif
+ %ifdef SRC
+  %2
+ %endif
+%endmacro
+
+
+bits 64
+
+testcase       { 0x62, 0x02, 0x7d, 0x48, 0xc4, 0xf5                                     }, { vpconflictd zmm30,zmm29                                      }
+testcase       { 0x62, 0x02, 0x7d, 0x4f, 0xc4, 0xf5                                     }, { vpconflictd zmm30\{k7\},zmm29                                }
+testcase       { 0x62, 0x02, 0x7d, 0xcf, 0xc4, 0xf5                                     }, { vpconflictd zmm30\{k7\}\{z\},zmm29                           }
+testcase       { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0x31                                     }, { vpconflictd zmm30,ZWORD [rcx]                                }
+testcase       { 0x62, 0x22, 0x7d, 0x48, 0xc4, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00       }, { vpconflictd zmm30,ZWORD [rax+r14*8+0x123]                    }
+testcase       { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0x31                                     }, { vpconflictd zmm30,DWORD [rcx]\{1to16\}                       }
+testcase       { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0x72, 0x7f                               }, { vpconflictd zmm30,ZWORD [rdx+0x1fc0]                         }
+testcase       { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0xb2, 0x00, 0x20, 0x00, 0x00             }, { vpconflictd zmm30,ZWORD [rdx+0x2000]                         }
+testcase       { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0x72, 0x80                               }, { vpconflictd zmm30,ZWORD [rdx-0x2000]                         }
+testcase       { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0xb2, 0xc0, 0xdf, 0xff, 0xff             }, { vpconflictd zmm30,ZWORD [rdx-0x2040]                         }
+testcase       { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0x72, 0x7f                               }, { vpconflictd zmm30,DWORD [rdx+0x1fc]\{1to16\}                 }
+testcase       { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0xb2, 0x00, 0x02, 0x00, 0x00             }, { vpconflictd zmm30,DWORD [rdx+0x200]\{1to16\}                 }
+testcase       { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0x72, 0x80                               }, { vpconflictd zmm30,DWORD [rdx-0x200]\{1to16\}                 }
+testcase       { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0xb2, 0xfc, 0xfd, 0xff, 0xff             }, { vpconflictd zmm30,DWORD [rdx-0x204]\{1to16\}                 }
+testcase       { 0x62, 0x02, 0xfd, 0x48, 0xc4, 0xf5                                     }, { vpconflictq zmm30,zmm29                                      }
+testcase       { 0x62, 0x02, 0xfd, 0x4f, 0xc4, 0xf5                                     }, { vpconflictq zmm30\{k7\},zmm29                                }
+testcase       { 0x62, 0x02, 0xfd, 0xcf, 0xc4, 0xf5                                     }, { vpconflictq zmm30\{k7\}\{z\},zmm29                           }
+testcase       { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0x31                                     }, { vpconflictq zmm30,ZWORD [rcx]                                }
+testcase       { 0x62, 0x22, 0xfd, 0x48, 0xc4, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00       }, { vpconflictq zmm30,ZWORD [rax+r14*8+0x123]                    }
+testcase       { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0x31                                     }, { vpconflictq zmm30,QWORD [rcx]\{1to8\}                        }
+testcase       { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0x72, 0x7f                               }, { vpconflictq zmm30,ZWORD [rdx+0x1fc0]                         }
+testcase       { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0xb2, 0x00, 0x20, 0x00, 0x00             }, { vpconflictq zmm30,ZWORD [rdx+0x2000]                         }
+testcase       { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0x72, 0x80                               }, { vpconflictq zmm30,ZWORD [rdx-0x2000]                         }
+testcase       { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0xb2, 0xc0, 0xdf, 0xff, 0xff             }, { vpconflictq zmm30,ZWORD [rdx-0x2040]                         }
+testcase       { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0x72, 0x7f                               }, { vpconflictq zmm30,QWORD [rdx+0x3f8]\{1to8\}                  }
+testcase       { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0xb2, 0x00, 0x04, 0x00, 0x00             }, { vpconflictq zmm30,QWORD [rdx+0x400]\{1to8\}                  }
+testcase       { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0x72, 0x80                               }, { vpconflictq zmm30,QWORD [rdx-0x400]\{1to8\}                  }
+testcase       { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0xb2, 0xf8, 0xfb, 0xff, 0xff             }, { vpconflictq zmm30,QWORD [rdx-0x408]\{1to8\}                  }
+testcase       { 0x62, 0x02, 0x7d, 0x48, 0x44, 0xf5                                     }, { vplzcntd zmm30,zmm29                                         }
+testcase       { 0x62, 0x02, 0x7d, 0x4f, 0x44, 0xf5                                     }, { vplzcntd zmm30\{k7\},zmm29                                   }
+testcase       { 0x62, 0x02, 0x7d, 0xcf, 0x44, 0xf5                                     }, { vplzcntd zmm30\{k7\}\{z\},zmm29                              }
+testcase       { 0x62, 0x62, 0x7d, 0x48, 0x44, 0x31                                     }, { vplzcntd zmm30,ZWORD [rcx]                                   }
+testcase       { 0x62, 0x22, 0x7d, 0x48, 0x44, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00       }, { vplzcntd zmm30,ZWORD [rax+r14*8+0x123]                       }
+testcase       { 0x62, 0x62, 0x7d, 0x58, 0x44, 0x31                                     }, { vplzcntd zmm30,DWORD [rcx]\{1to16\}                          }
+testcase       { 0x62, 0x62, 0x7d, 0x48, 0x44, 0x72, 0x7f                               }, { vplzcntd zmm30,ZWORD [rdx+0x1fc0]                            }
+testcase       { 0x62, 0x62, 0x7d, 0x48, 0x44, 0xb2, 0x00, 0x20, 0x00, 0x00             }, { vplzcntd zmm30,ZWORD [rdx+0x2000]                            }
+testcase       { 0x62, 0x62, 0x7d, 0x48, 0x44, 0x72, 0x80                               }, { vplzcntd zmm30,ZWORD [rdx-0x2000]                            }
+testcase       { 0x62, 0x62, 0x7d, 0x48, 0x44, 0xb2, 0xc0, 0xdf, 0xff, 0xff             }, { vplzcntd zmm30,ZWORD [rdx-0x2040]                            }
+testcase       { 0x62, 0x62, 0x7d, 0x58, 0x44, 0x72, 0x7f                               }, { vplzcntd zmm30,DWORD [rdx+0x1fc]\{1to16\}                    }
+testcase       { 0x62, 0x62, 0x7d, 0x58, 0x44, 0xb2, 0x00, 0x02, 0x00, 0x00             }, { vplzcntd zmm30,DWORD [rdx+0x200]\{1to16\}                    }
+testcase       { 0x62, 0x62, 0x7d, 0x58, 0x44, 0x72, 0x80                               }, { vplzcntd zmm30,DWORD [rdx-0x200]\{1to16\}                    }
+testcase       { 0x62, 0x62, 0x7d, 0x58, 0x44, 0xb2, 0xfc, 0xfd, 0xff, 0xff             }, { vplzcntd zmm30,DWORD [rdx-0x204]\{1to16\}                    }
+testcase       { 0x62, 0x02, 0xfd, 0x48, 0x44, 0xf5                                     }, { vplzcntq zmm30,zmm29                                         }
+testcase       { 0x62, 0x02, 0xfd, 0x4f, 0x44, 0xf5                                     }, { vplzcntq zmm30\{k7\},zmm29                                   }
+testcase       { 0x62, 0x02, 0xfd, 0xcf, 0x44, 0xf5                                     }, { vplzcntq zmm30\{k7\}\{z\},zmm29                              }
+testcase       { 0x62, 0x62, 0xfd, 0x48, 0x44, 0x31                                     }, { vplzcntq zmm30,ZWORD [rcx]                                   }
+testcase       { 0x62, 0x22, 0xfd, 0x48, 0x44, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00       }, { vplzcntq zmm30,ZWORD [rax+r14*8+0x123]                       }
+testcase       { 0x62, 0x62, 0xfd, 0x58, 0x44, 0x31                                     }, { vplzcntq zmm30,QWORD [rcx]\{1to8\}                           }
+testcase       { 0x62, 0x62, 0xfd, 0x48, 0x44, 0x72, 0x7f                               }, { vplzcntq zmm30,ZWORD [rdx+0x1fc0]                            }
+testcase       { 0x62, 0x62, 0xfd, 0x48, 0x44, 0xb2, 0x00, 0x20, 0x00, 0x00             }, { vplzcntq zmm30,ZWORD [rdx+0x2000]                            }
+testcase       { 0x62, 0x62, 0xfd, 0x48, 0x44, 0x72, 0x80                               }, { vplzcntq zmm30,ZWORD [rdx-0x2000]                            }
+testcase       { 0x62, 0x62, 0xfd, 0x48, 0x44, 0xb2, 0xc0, 0xdf, 0xff, 0xff             }, { vplzcntq zmm30,ZWORD [rdx-0x2040]                            }
+testcase       { 0x62, 0x62, 0xfd, 0x58, 0x44, 0x72, 0x7f                               }, { vplzcntq zmm30,QWORD [rdx+0x3f8]\{1to8\}                     }
+testcase       { 0x62, 0x62, 0xfd, 0x58, 0x44, 0xb2, 0x00, 0x04, 0x00, 0x00             }, { vplzcntq zmm30,QWORD [rdx+0x400]\{1to8\}                     }
+testcase       { 0x62, 0x62, 0xfd, 0x58, 0x44, 0x72, 0x80                               }, { vplzcntq zmm30,QWORD [rdx-0x400]\{1to8\}                     }
+testcase       { 0x62, 0x62, 0xfd, 0x58, 0x44, 0xb2, 0xf8, 0xfb, 0xff, 0xff             }, { vplzcntq zmm30,QWORD [rdx-0x408]\{1to8\}                     }
+testcase       { 0x62, 0x92, 0x16, 0x40, 0x27, 0xec                                     }, { vptestnmd k5,zmm29,zmm28                                     }
+testcase       { 0x62, 0x92, 0x16, 0x47, 0x27, 0xec                                     }, { vptestnmd k5\{k7\},zmm29,zmm28                               }
+testcase       { 0x62, 0xf2, 0x16, 0x40, 0x27, 0x29                                     }, { vptestnmd k5,zmm29,ZWORD [rcx]                               }
+testcase       { 0x62, 0xb2, 0x16, 0x40, 0x27, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00       }, { vptestnmd k5,zmm29,ZWORD [rax+r14*8+0x123]                   }
+testcase       { 0x62, 0xf2, 0x16, 0x50, 0x27, 0x29                                     }, { vptestnmd k5,zmm29,DWORD [rcx]\{1to16\}                      }
+testcase       { 0x62, 0xf2, 0x16, 0x40, 0x27, 0x6a, 0x7f                               }, { vptestnmd k5,zmm29,ZWORD [rdx+0x1fc0]                        }
+testcase       { 0x62, 0xf2, 0x16, 0x40, 0x27, 0xaa, 0x00, 0x20, 0x00, 0x00             }, { vptestnmd k5,zmm29,ZWORD [rdx+0x2000]                        }
+testcase       { 0x62, 0xf2, 0x16, 0x40, 0x27, 0x6a, 0x80                               }, { vptestnmd k5,zmm29,ZWORD [rdx-0x2000]                        }
+testcase       { 0x62, 0xf2, 0x16, 0x40, 0x27, 0xaa, 0xc0, 0xdf, 0xff, 0xff             }, { vptestnmd k5,zmm29,ZWORD [rdx-0x2040]                        }
+testcase       { 0x62, 0xf2, 0x16, 0x50, 0x27, 0x6a, 0x7f                               }, { vptestnmd k5,zmm29,DWORD [rdx+0x1fc]\{1to16\}                }
+testcase       { 0x62, 0xf2, 0x16, 0x50, 0x27, 0xaa, 0x00, 0x02, 0x00, 0x00             }, { vptestnmd k5,zmm29,DWORD [rdx+0x200]\{1to16\}                }
+testcase       { 0x62, 0xf2, 0x16, 0x50, 0x27, 0x6a, 0x80                               }, { vptestnmd k5,zmm29,DWORD [rdx-0x200]\{1to16\}                }
+testcase       { 0x62, 0xf2, 0x16, 0x50, 0x27, 0xaa, 0xfc, 0xfd, 0xff, 0xff             }, { vptestnmd k5,zmm29,DWORD [rdx-0x204]\{1to16\}                }
+testcase       { 0x62, 0x92, 0x96, 0x40, 0x27, 0xec                                     }, { vptestnmq k5,zmm29,zmm28                                     }
+testcase       { 0x62, 0x92, 0x96, 0x47, 0x27, 0xec                                     }, { vptestnmq k5\{k7\},zmm29,zmm28                               }
+testcase       { 0x62, 0xf2, 0x96, 0x40, 0x27, 0x29                                     }, { vptestnmq k5,zmm29,ZWORD [rcx]                               }
+testcase       { 0x62, 0xb2, 0x96, 0x40, 0x27, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00       }, { vptestnmq k5,zmm29,ZWORD [rax+r14*8+0x123]                   }
+testcase       { 0x62, 0xf2, 0x96, 0x50, 0x27, 0x29                                     }, { vptestnmq k5,zmm29,QWORD [rcx]\{1to8\}                       }
+testcase       { 0x62, 0xf2, 0x96, 0x40, 0x27, 0x6a, 0x7f                               }, { vptestnmq k5,zmm29,ZWORD [rdx+0x1fc0]                        }
+testcase       { 0x62, 0xf2, 0x96, 0x40, 0x27, 0xaa, 0x00, 0x20, 0x00, 0x00             }, { vptestnmq k5,zmm29,ZWORD [rdx+0x2000]                        }
+testcase       { 0x62, 0xf2, 0x96, 0x40, 0x27, 0x6a, 0x80                               }, { vptestnmq k5,zmm29,ZWORD [rdx-0x2000]                        }
+testcase       { 0x62, 0xf2, 0x96, 0x40, 0x27, 0xaa, 0xc0, 0xdf, 0xff, 0xff             }, { vptestnmq k5,zmm29,ZWORD [rdx-0x2040]                        }
+testcase       { 0x62, 0xf2, 0x96, 0x50, 0x27, 0x6a, 0x7f                               }, { vptestnmq k5,zmm29,QWORD [rdx+0x3f8]\{1to8\}                 }
+testcase       { 0x62, 0xf2, 0x96, 0x50, 0x27, 0xaa, 0x00, 0x04, 0x00, 0x00             }, { vptestnmq k5,zmm29,QWORD [rdx+0x400]\{1to8\}                 }
+testcase       { 0x62, 0xf2, 0x96, 0x50, 0x27, 0x6a, 0x80                               }, { vptestnmq k5,zmm29,QWORD [rdx-0x400]\{1to8\}                 }
+testcase       { 0x62, 0xf2, 0x96, 0x50, 0x27, 0xaa, 0xf8, 0xfb, 0xff, 0xff             }, { vptestnmq k5,zmm29,QWORD [rdx-0x408]\{1to8\}                 }
+testcase       { 0x62, 0x62, 0x7e, 0x48, 0x3a, 0xf6                                     }, { vpbroadcastmw2d zmm30,k6                                     }
+testcase       { 0x62, 0x62, 0xfe, 0x48, 0x2a, 0xf6                                     }, { vpbroadcastmb2q zmm30,k6                                     }