Remove regbnd and vec_disp8

[platform/upstream/binutils.git] / gas / config / tc-i386.c
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c

index 15eed14..147acdf 100644 (file)
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1,6 +1,7 @@
  /* tc-i386.c -- Assemble code for the Intel 80386
     Copyright 1989, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
+   2012, 2013, 2014
     Free Software Foundation, Inc.
  
     This file is part of GAS, the GNU Assembler.
@@ -59,12 +60,14 @@
     WAIT_PREFIX must be the first prefix since FWAIT is really is an
     instruction, and so must come before any prefixes.
     The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
-   REP_PREFIX, LOCK_PREFIX.  */
+   REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  #define WAIT_PREFIX    0
  #define SEG_PREFIX     1
  #define ADDR_PREFIX    2
  #define DATA_PREFIX    3
  #define REP_PREFIX     4
+#define HLE_PREFIX     REP_PREFIX
+#define BND_PREFIX     REP_PREFIX
  #define LOCK_PREFIX    5
  #define REX_PREFIX     6       /* must come last.  */
  #define MAX_PREFIXES   7       /* max prefixes per opcode */
@@ -83,6 +86,7 @@
  #define QWORD_MNEM_SUFFIX  'q'
  #define XMMWORD_MNEM_SUFFIX  'x'
  #define YMMWORD_MNEM_SUFFIX 'y'
+#define ZMMWORD_MNEM_SUFFIX 'z'
  /* Intel Syntax.  Use a non-ascii letter since since it never appears
     in instructions.  */
  #define LONG_DOUBLE_MNEM_SUFFIX '\1'
@@ -142,7 +146,7 @@ static void set_16bit_gcc_code_flag (int);
  static void set_intel_syntax (int);
  static void set_intel_mnemonic (int);
  static void set_allow_index_reg (int);
-static void set_sse_check (int);
+static void set_check (int);
  static void set_cpu_arch (int);
  #ifdef TE_PE
  static void pe_directive_secrel (int);
@@ -186,11 +190,53 @@ static void handle_large_common (int small ATTRIBUTE_UNUSED);
  
  static const char *default_arch = DEFAULT_ARCH;
  
+/* This struct describes rounding control and SAE in the instruction.  */
+struct RC_Operation
+{
+  enum rc_type
+    {
+      rne = 0,
+      rd,
+      ru,
+      rz,
+      saeonly
+    } type;
+  int operand;
+};
+
+static struct RC_Operation rc_op;
+
+/* The struct describes masking, applied to OPERAND in the instruction.
+   MASK is a pointer to the corresponding mask register.  ZEROING tells
+   whether merging or zeroing mask is used.  */
+struct Mask_Operation
+{
+  const reg_entry *mask;
+  unsigned int zeroing;
+  /* The operand where this operation is associated.  */
+  int operand;
+};
+
+static struct Mask_Operation mask_op;
+
+/* The struct describes broadcasting, applied to OPERAND.  FACTOR is
+   broadcast factor.  */
+struct Broadcast_Operation
+{
+  /* Type of broadcast: no broadcast, {1to8}, or {1to16}.  */
+  int type;
+
+  /* Index of broadcasted operand.  */
+  int operand;
+};
+
+static struct Broadcast_Operation broadcast_op;
+
  /* VEX prefix.  */
  typedef struct
  {
-  /* VEX prefix is either 2 byte or 3 byte.  */
-  unsigned char bytes[3];
+  /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
+  unsigned char bytes[4];
    unsigned int length;
    /* Destination or source register specifier.  */
    const reg_entry *register_specifier;
@@ -217,7 +263,20 @@ enum i386_error
      old_gcc_only,
      unsupported_with_intel_mnemonic,
      unsupported_syntax,
-    unsupported
+    unsupported,
+    invalid_vsib_address,
+    invalid_vector_register_set,
+    unsupported_vector_index_register,
+    unsupported_broadcast,
+    broadcast_not_on_src_operand,
+    broadcast_needed,
+    unsupported_masking,
+    mask_not_on_destination,
+    no_default_mask,
+    unsupported_rc_sae,
+    rc_sae_operand_not_last_imm,
+    invalid_register_operand,
+    try_vector_disp8
    };
  
  struct _i386_insn
@@ -271,14 +330,44 @@ struct _i386_insn
         addressing modes of this insn are encoded.  */
      modrm_byte rm;
      rex_byte rex;
+    rex_byte vrex;
      sib_byte sib;
      vex_prefix vex;
  
+    /* Masking attributes.  */
+    struct Mask_Operation *mask;
+
+    /* Rounding control and SAE attributes.  */
+    struct RC_Operation *rounding;
+
+    /* Broadcasting attributes.  */
+    struct Broadcast_Operation *broadcast;
+
+    /* Compressed disp8*N attribute.  */
+    unsigned int memshift;
+
      /* Swap operand in encoding.  */
      unsigned int swap_operand;
  
-    /* Force 32bit displacement in encoding.  */
-    unsigned int disp32_encoding;
+    /* Prefer 8bit or 32bit displacement in encoding.  */
+    enum
+      {
+       disp_encoding_default = 0,
+       disp_encoding_8bit,
+       disp_encoding_32bit
+      } disp_encoding;
+
+    /* REP prefix.  */
+    const char *rep_prefix;
+
+    /* HLE prefix.  */
+    const char *hle_prefix;
+
+    /* Have BND prefix.  */
+    const char *bnd_prefix;
+
+    /* Need VREX to support upper 16 registers.  */
+    int need_vrex;
  
      /* Error message.  */
      enum i386_error error;
@@ -286,9 +375,27 @@ struct _i386_insn
  
  typedef struct _i386_insn i386_insn;
  
+/* Link RC type with corresponding string, that'll be looked for in
+   asm.  */
+struct RC_name
+{
+  enum rc_type type;
+  const char *name;
+  unsigned int len;
+};
+
+static const struct RC_name RC_NamesTable[] =
+{
+  {  rne, STRING_COMMA_LEN ("rn-sae") },
+  {  rd,  STRING_COMMA_LEN ("rd-sae") },
+  {  ru,  STRING_COMMA_LEN ("ru-sae") },
+  {  rz,  STRING_COMMA_LEN ("rz-sae") },
+  {  saeonly,  STRING_COMMA_LEN ("sae") },
+};
+
  /* List of chars besides those in app.c:symbol_chars that can start an
     operand.  Used to prevent the scrubber eating vital white-space.  */
-const char extra_symbol_chars[] = "*%-(["
+const char extra_symbol_chars[] = "*%-([{"
  #ifdef LEX_AT
         "@"
  #endif
@@ -301,8 +408,10 @@ const char extra_symbol_chars[] = "*%-(["
       || ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))        \
          && !defined (TE_GNU)                           \
          && !defined (TE_LINUX)                         \
-        && !defined (TE_NETWARE)                       \
+        && !defined (TE_NACL)                          \
+        && !defined (TE_NETWARE)                       \
          && !defined (TE_FreeBSD)                       \
+        && !defined (TE_DragonFly)                     \
          && !defined (TE_NetBSD)))
  /* This array holds the chars that always start a comment.  If the
     pre-processor is disabled, these aren't very useful.  The option
@@ -389,7 +498,7 @@ enum flag_code {
  
  static enum flag_code flag_code;
  static unsigned int object_64bit;
-static unsigned int disallow_64bit_disp;
+static unsigned int disallow_64bit_reloc;
  static int use_rela_relocations = 0;
  
  #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
@@ -407,14 +516,6 @@ enum x86_elf_abi
  static enum x86_elf_abi x86_elf_abi = I386_ABI;
  #endif
  
-/* The names used to print error messages.  */
-static const char *flag_code_names[] =
-  {
-    "32",
-    "16",
-    "64"
-  };
-
  /* 1 for intel syntax,
     0 if att syntax.  */
  static int intel_syntax = 0;
@@ -432,16 +533,21 @@ static int allow_pseudo_reg = 0;
  /* 1 if register prefix % not required.  */
  static int allow_naked_reg = 0;
  
+/* 1 if the assembler should add BND prefix for all control-tranferring
+   instructions supporting it, even if this prefix wasn't specified
+   explicitly.  */
+static int add_bnd_prefix = 0;
+
  /* 1 if pseudo index register, eiz/riz, is allowed .  */
  static int allow_index_reg = 0;
  
-static enum
+static enum check_kind
    {
-    sse_check_none = 0,
-    sse_check_warning,
-    sse_check_error
+    check_none = 0,
+    check_warning,
+    check_error
    }
-sse_check;
+sse_check, operand_check = check_warning;
  
  /* Register prefix used for error message.  */
  static const char *register_prefix = "%";
@@ -493,6 +599,21 @@ static enum
      vex256
    } avxscalar;
  
+/* Encode scalar EVEX LIG instructions with specific vector length.  */
+static enum
+  {
+    evexl128 = 0,
+    evexl256,
+    evexl512
+  } evexlig;
+
+/* Encode EVEX WIG instructions with specific evex.w.  */
+static enum
+  {
+    evexw0 = 0,
+    evexw1
+  } evexwig;
+
  /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
  static symbolS *GOT_symbol;
  
@@ -629,6 +750,8 @@ static const arch_entry cpu_arch[] =
      CPU_COREI7_FLAGS, 0, 0 },
    { STRING_COMMA_LEN ("l1om"), PROCESSOR_L1OM,
      CPU_L1OM_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("k1om"), PROCESSOR_K1OM,
+    CPU_K1OM_FLAGS, 0, 0 },
    { STRING_COMMA_LEN ("k6"), PROCESSOR_K6,
      CPU_K6_FLAGS, 0, 0 },
    { STRING_COMMA_LEN ("k6_2"), PROCESSOR_K6,
@@ -643,8 +766,18 @@ static const arch_entry cpu_arch[] =
      CPU_K8_FLAGS, 0, 0 },
    { STRING_COMMA_LEN ("amdfam10"), PROCESSOR_AMDFAM10,
      CPU_AMDFAM10_FLAGS, 0, 0 },
-  { STRING_COMMA_LEN ("bdver1"), PROCESSOR_BDVER1,
+  { STRING_COMMA_LEN ("bdver1"), PROCESSOR_BD,
      CPU_BDVER1_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("bdver2"), PROCESSOR_BD,
+    CPU_BDVER2_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("bdver3"), PROCESSOR_BD,
+    CPU_BDVER3_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("bdver4"), PROCESSOR_BD,
+    CPU_BDVER4_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("btver1"), PROCESSOR_BT,
+    CPU_BTVER1_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN ("btver2"), PROCESSOR_BT,
+    CPU_BTVER2_FLAGS, 0, 0 },
    { STRING_COMMA_LEN (".8087"), PROCESSOR_UNKNOWN,
      CPU_8087_FLAGS, 0, 0 },
    { STRING_COMMA_LEN (".287"), PROCESSOR_UNKNOWN,
@@ -675,10 +808,22 @@ static const arch_entry cpu_arch[] =
      CPU_ANY_SSE_FLAGS, 0, 1 },
    { STRING_COMMA_LEN (".avx"), PROCESSOR_UNKNOWN,
      CPU_AVX_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".avx2"), PROCESSOR_UNKNOWN,
+    CPU_AVX2_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".avx512f"), PROCESSOR_UNKNOWN,
+    CPU_AVX512F_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".avx512cd"), PROCESSOR_UNKNOWN,
+    CPU_AVX512CD_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".avx512er"), PROCESSOR_UNKNOWN,
+    CPU_AVX512ER_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".avx512pf"), PROCESSOR_UNKNOWN,
+    CPU_AVX512PF_FLAGS, 0, 0 },
    { STRING_COMMA_LEN (".noavx"), PROCESSOR_UNKNOWN,
      CPU_ANY_AVX_FLAGS, 0, 1 },
    { STRING_COMMA_LEN (".vmx"), PROCESSOR_UNKNOWN,
      CPU_VMX_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".vmfunc"), PROCESSOR_UNKNOWN,
+    CPU_VMFUNC_FLAGS, 0, 0 },
    { STRING_COMMA_LEN (".smx"), PROCESSOR_UNKNOWN,
      CPU_SMX_FLAGS, 0, 0 },
    { STRING_COMMA_LEN (".xsave"), PROCESSOR_UNKNOWN,
@@ -697,6 +842,8 @@ static const arch_entry cpu_arch[] =
      CPU_RDRND_FLAGS, 0, 0 },
    { STRING_COMMA_LEN (".f16c"), PROCESSOR_UNKNOWN,
      CPU_F16C_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".bmi2"), PROCESSOR_UNKNOWN,
+    CPU_BMI2_FLAGS, 0, 0 },
    { STRING_COMMA_LEN (".fma"), PROCESSOR_UNKNOWN,
      CPU_FMA_FLAGS, 0, 0 },
    { STRING_COMMA_LEN (".fma4"), PROCESSOR_UNKNOWN,
@@ -707,8 +854,18 @@ static const arch_entry cpu_arch[] =
      CPU_LWP_FLAGS, 0, 0 },
    { STRING_COMMA_LEN (".movbe"), PROCESSOR_UNKNOWN,
      CPU_MOVBE_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".cx16"), PROCESSOR_UNKNOWN,
+    CPU_CX16_FLAGS, 0, 0 },
    { STRING_COMMA_LEN (".ept"), PROCESSOR_UNKNOWN,
      CPU_EPT_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".lzcnt"), PROCESSOR_UNKNOWN,
+    CPU_LZCNT_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".hle"), PROCESSOR_UNKNOWN,
+    CPU_HLE_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".rtm"), PROCESSOR_UNKNOWN,
+    CPU_RTM_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".invpcid"), PROCESSOR_UNKNOWN,
+    CPU_INVPCID_FLAGS, 0, 0 },
    { STRING_COMMA_LEN (".clflush"), PROCESSOR_UNKNOWN,
      CPU_CLFLUSH_FLAGS, 0, 0 },
    { STRING_COMMA_LEN (".nop"), PROCESSOR_UNKNOWN,
@@ -733,6 +890,20 @@ static const arch_entry cpu_arch[] =
      CPU_ABM_FLAGS, 0, 0 },
    { STRING_COMMA_LEN (".bmi"), PROCESSOR_UNKNOWN,
      CPU_BMI_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".tbm"), PROCESSOR_UNKNOWN,
+    CPU_TBM_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".adx"), PROCESSOR_UNKNOWN,
+    CPU_ADX_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".rdseed"), PROCESSOR_UNKNOWN,
+    CPU_RDSEED_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".prfchw"), PROCESSOR_UNKNOWN,
+    CPU_PRFCHW_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".smap"), PROCESSOR_UNKNOWN,
+    CPU_SMAP_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".mpx"), PROCESSOR_UNKNOWN,
+    CPU_MPX_FLAGS, 0, 0 },
+  { STRING_COMMA_LEN (".sha"), PROCESSOR_UNKNOWN,
+    CPU_SHA_FLAGS, 0, 0 },
  };
  
  #ifdef I386COFF
@@ -807,7 +978,8 @@ const pseudo_typeS md_pseudo_table[] =
    {"att_mnemonic", set_intel_mnemonic, 0},
    {"allow_index_reg", set_allow_index_reg, 1},
    {"disallow_index_reg", set_allow_index_reg, 0},
-  {"sse_check", set_sse_check, 0},
+  {"sse_check", set_check, 0},
+  {"operand_check", set_check, 1},
  #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
    {"largecomm", handle_large_common, 0},
  #else
@@ -1018,7 +1190,7 @@ i386_align_code (fragS *fragP, int count)
       PROCESSOR_CORE, PROCESSOR_CORE2, PROCESSOR_COREI7, and
       PROCESSOR_GENERIC64, alt_long_patt will be used.
       3. For PROCESSOR_ATHLON, PROCESSOR_K6, PROCESSOR_K8 and
-     PROCESSOR_AMDFAM10, and PROCESSOR_BDVER1, alt_short_patt
+     PROCESSOR_AMDFAM10, PROCESSOR_BD and PROCESSOR_BT, alt_short_patt
       will be used.
  
       When -mtune= isn't used, alt_long_patt will be used if
@@ -1058,13 +1230,13 @@ i386_align_code (fragS *fragP, int count)
               else
                 patt = f32_patt;
               break;
-           case PROCESSOR_PENTIUMPRO:
             case PROCESSOR_PENTIUM4:
             case PROCESSOR_NOCONA:
             case PROCESSOR_CORE:
             case PROCESSOR_CORE2:
             case PROCESSOR_COREI7:
             case PROCESSOR_L1OM:
+           case PROCESSOR_K1OM:
             case PROCESSOR_GENERIC64:
               patt = alt_long_patt;
               break;
@@ -1072,12 +1244,14 @@ i386_align_code (fragS *fragP, int count)
             case PROCESSOR_ATHLON:
             case PROCESSOR_K8:
             case PROCESSOR_AMDFAM10:
-           case PROCESSOR_BDVER1:
+           case PROCESSOR_BD:
+           case PROCESSOR_BT:
               patt = alt_short_patt;
               break;
             case PROCESSOR_I386:
             case PROCESSOR_I486:
             case PROCESSOR_PENTIUM:
+           case PROCESSOR_PENTIUMPRO:
             case PROCESSOR_GENERIC32:
               patt = f32_patt;
               break;
@@ -1100,7 +1274,8 @@ i386_align_code (fragS *fragP, int count)
             case PROCESSOR_ATHLON:
             case PROCESSOR_K8:
             case PROCESSOR_AMDFAM10:
-           case PROCESSOR_BDVER1:
+           case PROCESSOR_BD:
+           case PROCESSOR_BT:
             case PROCESSOR_GENERIC32:
               /* We use cpu_arch_isa_flags to check if we CAN optimize
                  with nops.  */
@@ -1116,6 +1291,7 @@ i386_align_code (fragS *fragP, int count)
             case PROCESSOR_CORE2:
             case PROCESSOR_COREI7:
             case PROCESSOR_L1OM:
+           case PROCESSOR_K1OM:
               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
                 patt = alt_long_patt;
               else
@@ -1479,6 +1655,8 @@ static const i386_operand_type anydisp
    = OPERAND_TYPE_ANYDISP;
  static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
  static const i386_operand_type regymm = OPERAND_TYPE_REGYMM;
+static const i386_operand_type regzmm = OPERAND_TYPE_REGZMM;
+static const i386_operand_type regmask = OPERAND_TYPE_REGMASK;
  static const i386_operand_type imm8 = OPERAND_TYPE_IMM8;
  static const i386_operand_type imm8s = OPERAND_TYPE_IMM8S;
  static const i386_operand_type imm16 = OPERAND_TYPE_IMM16;
@@ -1571,7 +1749,9 @@ match_mem_size (const insn_template *t, unsigned int j)
                || (i.types[j].bitfield.xmmword
                    && !t->operand_types[j].bitfield.xmmword)
                || (i.types[j].bitfield.ymmword
-                  && !t->operand_types[j].bitfield.ymmword)));
+                  && !t->operand_types[j].bitfield.ymmword)
+              || (i.types[j].bitfield.zmmword
+                  && !t->operand_types[j].bitfield.zmmword)));
  }
  
  /* Return 1 if there is no size conflict on any operands for
@@ -1652,6 +1832,7 @@ operand_type_match (i386_operand_type overlap,
    temp.bitfield.tbyte = 0;
    temp.bitfield.xmmword = 0;
    temp.bitfield.ymmword = 0;
+  temp.bitfield.zmmword = 0;
    if (operand_type_all_zero (&temp))
      goto mismatch;
  
@@ -1716,9 +1897,20 @@ operand_type_register_match (i386_operand_type m0,
  }
  
  static INLINE unsigned int
+register_number (const reg_entry *r)
+{
+  unsigned int nr = r->reg_num;
+
+  if (r->reg_flags & RegRex)
+    nr += 8;
+
+  return nr;
+}
+
+static INLINE unsigned int
  mode_from_disp_size (i386_operand_type t)
  {
-  if (t.bitfield.disp8)
+  if (t.bitfield.disp8 || t.bitfield.vec_disp8)
      return 1;
    else if (t.bitfield.disp16
            || t.bitfield.disp32
@@ -1774,6 +1966,25 @@ fits_in_unsigned_long (offsetT num ATTRIBUTE_UNUSED)
  }                              /* fits_in_unsigned_long() */
  
  static INLINE int
+fits_in_vec_disp8 (offsetT num)
+{
+  int shift = i.memshift;
+  unsigned int mask;
+
+  if (shift == -1)
+    abort ();
+
+  mask = (1 << shift) - 1;
+
+  /* Return 0 if NUM isn't properly aligned.  */
+  if ((num & mask))
+    return 0;
+
+  /* Check if NUM will fit in 8bit after shift.  */
+  return fits_in_signed_byte (num >> shift);
+}
+
+static INLINE int
  fits_in_imm4 (offsetT num)
  {
    return (num & 0xf) == num;
@@ -2058,8 +2269,22 @@ set_allow_index_reg (int flag)
  }
  
  static void
-set_sse_check (int dummy ATTRIBUTE_UNUSED)
+set_check (int what)
  {
+  enum check_kind *kind;
+  const char *str;
+
+  if (what)
+    {
+      kind = &operand_check;
+      str = "operand";
+    }
+  else
+    {
+      kind = &sse_check;
+      str = "sse";
+    }
+
    SKIP_WHITESPACE ();
  
    if (!is_end_of_line[(unsigned char) *input_line_pointer])
@@ -2068,17 +2293,17 @@ set_sse_check (int dummy ATTRIBUTE_UNUSED)
        int e = get_symbol_end ();
  
        if (strcmp (string, "none") == 0)
-       sse_check = sse_check_none;
+       *kind = check_none;
        else if (strcmp (string, "warning") == 0)
-       sse_check = sse_check_warning;
+       *kind = check_warning;
        else if (strcmp (string, "error") == 0)
-       sse_check = sse_check_error;
+       *kind = check_error;
        else
-       as_bad (_("bad argument to sse_check directive."));
+       as_bad (_("bad argument to %s_check directive."), str);
        *input_line_pointer = e;
      }
    else
-    as_bad (_("missing argument for sse_check directive"));
+    as_bad (_("missing argument for %s_check directive"), str);
  
    demand_empty_rest_of_line ();
  }
@@ -2108,6 +2333,11 @@ check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
        || new_flag.bitfield.cpul1om)
      return;
  
+  /* If we are targeting Intel K1OM, we must enable it.  */
+  if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_K1OM
+      || new_flag.bitfield.cpuk1om)
+    return;
+
    as_bad (_("`%s' is not supported on `%s'"), name, arch);
  #endif
  }
@@ -2174,6 +2404,7 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
                   else
                     cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
                   cpu_arch_flags = flags;
+                 cpu_arch_isa_flags = flags;
                 }
               *input_line_pointer = e;
               demand_empty_rest_of_line ();
@@ -2218,12 +2449,19 @@ i386_arch (void)
         as_fatal (_("Intel L1OM is 64bit ELF only"));
        return bfd_arch_l1om;
      }
+  else if (cpu_arch_isa == PROCESSOR_K1OM)
+    {
+      if (OUTPUT_FLAVOR != bfd_target_elf_flavour
+         || flag_code != CODE_64BIT)
+       as_fatal (_("Intel K1OM is 64bit ELF only"));
+      return bfd_arch_k1om;
+    }
    else
      return bfd_arch_i386;
  }
  
  unsigned long
-i386_mach ()
+i386_mach (void)
  {
    if (!strncmp (default_arch, "x86_64", 6))
      {
@@ -2234,6 +2472,13 @@ i386_mach ()
             as_fatal (_("Intel L1OM is 64bit ELF only"));
           return bfd_mach_l1om;
         }
+      else if (cpu_arch_isa == PROCESSOR_K1OM)
+       {
+         if (OUTPUT_FLAVOR != bfd_target_elf_flavour
+             || default_arch[6] != '\0')
+           as_fatal (_("Intel K1OM is 64bit ELF only"));
+         return bfd_mach_k1om;
+       }
        else if (default_arch[6] == '\0')
         return bfd_mach_x86_64;
        else
@@ -2242,11 +2487,11 @@ i386_mach ()
    else if (!strcmp (default_arch, "i386"))
      return bfd_mach_i386_i386;
    else
-    as_fatal (_("Unknown architecture"));
+    as_fatal (_("unknown architecture"));
  }
  \f
  void
-md_begin ()
+md_begin (void)
  {
    const char *hash_err;
  
@@ -2276,7 +2521,7 @@ md_begin ()
                                     (void *) core_optab);
             if (hash_err)
               {
-               as_fatal (_("Internal Error:  Can't hash %s: %s"),
+               as_fatal (_("can't hash %s: %s"),
                           (optab - 1)->name,
                           hash_err);
               }
@@ -2298,7 +2543,7 @@ md_begin ()
        {
         hash_err = hash_insert (reg_hash, regtab->reg_name, (void *) regtab);
         if (hash_err)
-         as_fatal (_("Internal Error:  Can't hash %s: %s"),
+         as_fatal (_("can't hash %s: %s"),
                     regtab->reg_name,
                     hash_err);
        }
@@ -2330,6 +2575,8 @@ md_begin ()
             register_chars[c] = mnemonic_chars[c];
             operand_chars[c] = c;
           }
+       else if (c == '{' || c == '}')
+         operand_chars[c] = c;
  
         if (ISALPHA (c) || ISDIGIT (c))
           identifier_chars[c] = c;
@@ -2358,18 +2605,14 @@ md_begin ()
        operand_chars[(unsigned char) *p] = *p;
    }
  
-#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
-  if (IS_ELF)
-    {
-      record_alignment (text_section, 2);
-      record_alignment (data_section, 2);
-      record_alignment (bss_section, 2);
-    }
-#endif
-
    if (flag_code == CODE_64BIT)
      {
+#if defined (OBJ_COFF) && defined (TE_PE)
+      x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
+                                 ? 32 : 16);
+#else
        x86_dwarf2_return_column = 16;
+#endif
        x86_cie_data_alignment = -8;
      }
    else
@@ -2426,6 +2669,7 @@ pi (char *line, i386_insn *x)
           || x->types[j].bitfield.regmmx
           || x->types[j].bitfield.regxmm
           || x->types[j].bitfield.regymm
+         || x->types[j].bitfield.regzmm
           || x->types[j].bitfield.sreg2
           || x->types[j].bitfield.sreg3
           || x->types[j].bitfield.control
@@ -2513,6 +2757,7 @@ const type_names[] =
    { OPERAND_TYPE_DISP32, "d32" },
    { OPERAND_TYPE_DISP32S, "d32s" },
    { OPERAND_TYPE_DISP64, "d64" },
+  { OPERAND_TYPE_VEC_DISP8, "Vector d8" },
    { OPERAND_TYPE_INOUTPORTREG, "InOutPortReg" },
    { OPERAND_TYPE_SHIFTCOUNT, "ShiftCount" },
    { OPERAND_TYPE_CONTROL, "control reg" },
@@ -2527,6 +2772,8 @@ const type_names[] =
    { OPERAND_TYPE_REGMMX, "rMMX" },
    { OPERAND_TYPE_REGXMM, "rXMM" },
    { OPERAND_TYPE_REGYMM, "rYMM" },
+  { OPERAND_TYPE_REGZMM, "rZMM" },
+  { OPERAND_TYPE_REGMASK, "Mask reg" },
    { OPERAND_TYPE_ESSEG, "es" },
  };
  
@@ -2551,6 +2798,7 @@ static bfd_reloc_code_real_type
  reloc (unsigned int size,
         int pcrel,
         int sign,
+       int bnd_prefix,
         bfd_reloc_code_real_type other)
  {
    if (other != NO_RELOC)
@@ -2582,8 +2830,18 @@ reloc (unsigned int size,
             break;
           }
  
+#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
+      if (other == BFD_RELOC_SIZE32)
+       {
+         if (size == 8)
+           return BFD_RELOC_SIZE64;
+         if (pcrel)
+           as_bad (_("there are no pc-relative size relocations"));
+       }
+#endif
+
        /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
-      if (size == 4 && flag_code != CODE_64BIT)
+      if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
         sign = -1;
  
        rel = bfd_reloc_type_lookup (stdoutput, other);
@@ -2613,7 +2871,9 @@ reloc (unsigned int size,
         {
         case 1: return BFD_RELOC_8_PCREL;
         case 2: return BFD_RELOC_16_PCREL;
-       case 4: return BFD_RELOC_32_PCREL;
+       case 4: return (bnd_prefix && object_64bit
+                       ? BFD_RELOC_X86_64_PC32_BND
+                       : BFD_RELOC_32_PCREL);
         case 8: return BFD_RELOC_64_PCREL;
         }
        as_bad (_("cannot do %u byte pc-relative relocation"), size);
@@ -2665,8 +2925,11 @@ tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
        && fixP->fx_r_type == BFD_RELOC_32_PCREL)
      return 0;
  
-  /* adjust_reloc_syms doesn't know about the GOT.  */
-  if (fixP->fx_r_type == BFD_RELOC_386_GOTOFF
+  /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
+     for size relocations.  */
+  if (fixP->fx_r_type == BFD_RELOC_SIZE32
+      || fixP->fx_r_type == BFD_RELOC_SIZE64
+      || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
        || fixP->fx_r_type == BFD_RELOC_386_PLT32
        || fixP->fx_r_type == BFD_RELOC_386_GOT32
        || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
@@ -2764,10 +3027,9 @@ build_vex_prefix (const insn_template *t)
    /* Check register specifier.  */
    if (i.vex.register_specifier)
      {
-      register_specifier = i.vex.register_specifier->reg_num;
-      if ((i.vex.register_specifier->reg_flags & RegRex))
-       register_specifier += 8;
-      register_specifier = ~register_specifier & 0xf;
+      register_specifier =
+       ~register_number (i.vex.register_specifier) & 0xf;
+      gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
      }
    else
      register_specifier = 0xf;
@@ -2902,21 +3164,191 @@ build_vex_prefix (const insn_template *t)
      }
  }
  
+/* Build the EVEX prefix.  */
+
+static void
+build_evex_prefix (void)
+{
+  unsigned int register_specifier;
+  unsigned int implied_prefix;
+  unsigned int m, w;
+  rex_byte vrex_used = 0;
+
+  /* Check register specifier.  */
+  if (i.vex.register_specifier)
+    {
+      gas_assert ((i.vrex & REX_X) == 0);
+
+      register_specifier = i.vex.register_specifier->reg_num;
+      if ((i.vex.register_specifier->reg_flags & RegRex))
+       register_specifier += 8;
+      /* The upper 16 registers are encoded in the fourth byte of the
+        EVEX prefix.  */
+      if (!(i.vex.register_specifier->reg_flags & RegVRex))
+       i.vex.bytes[3] = 0x8;
+      register_specifier = ~register_specifier & 0xf;
+    }
+  else
+    {
+      register_specifier = 0xf;
+
+      /* Encode upper 16 vector index register in the fourth byte of
+        the EVEX prefix.  */
+      if (!(i.vrex & REX_X))
+       i.vex.bytes[3] = 0x8;
+      else
+       vrex_used |= REX_X;
+    }
+
+  switch ((i.tm.base_opcode >> 8) & 0xff)
+    {
+    case 0:
+      implied_prefix = 0;
+      break;
+    case DATA_PREFIX_OPCODE:
+      implied_prefix = 1;
+      break;
+    case REPE_PREFIX_OPCODE:
+      implied_prefix = 2;
+      break;
+    case REPNE_PREFIX_OPCODE:
+      implied_prefix = 3;
+      break;
+    default:
+      abort ();
+    }
+
+  /* 4 byte EVEX prefix.  */
+  i.vex.length = 4;
+  i.vex.bytes[0] = 0x62;
+
+  /* mmmm bits.  */
+  switch (i.tm.opcode_modifier.vexopcode)
+    {
+    case VEX0F:
+      m = 1;
+      break;
+    case VEX0F38:
+      m = 2;
+      break;
+    case VEX0F3A:
+      m = 3;
+      break;
+    default:
+      abort ();
+      break;
+    }
+
+  /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
+     bits from REX.  */
+  i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
+
+  /* The fifth bit of the second EVEX byte is 1's compliment of the
+     REX_R bit in VREX.  */
+  if (!(i.vrex & REX_R))
+    i.vex.bytes[1] |= 0x10;
+  else
+    vrex_used |= REX_R;
+
+  if ((i.reg_operands + i.imm_operands) == i.operands)
+    {
+      /* When all operands are registers, the REX_X bit in REX is not
+        used.  We reuse it to encode the upper 16 registers, which is
+        indicated by the REX_B bit in VREX.  The REX_X bit is encoded
+        as 1's compliment.  */
+      if ((i.vrex & REX_B))
+       {
+         vrex_used |= REX_B;
+         i.vex.bytes[1] &= ~0x40;
+       }
+    }
+
+  /* EVEX instructions shouldn't need the REX prefix.  */
+  i.vrex &= ~vrex_used;
+  gas_assert (i.vrex == 0);
+
+  /* Check the REX.W bit.  */
+  w = (i.rex & REX_W) ? 1 : 0;
+  if (i.tm.opcode_modifier.vexw)
+    {
+      if (i.tm.opcode_modifier.vexw == VEXW1)
+       w = 1;
+    }
+  /* If w is not set it means we are dealing with WIG instruction.  */
+  else if (!w)
+    {
+      if (evexwig == evexw1)
+        w = 1;
+    }
+
+  /* Encode the U bit.  */
+  implied_prefix |= 0x4;
+
+  /* The third byte of the EVEX prefix.  */
+  i.vex.bytes[2] = (w << 7 | register_specifier << 3 | implied_prefix);
+
+  /* The fourth byte of the EVEX prefix.  */
+  /* The zeroing-masking bit.  */
+  if (i.mask && i.mask->zeroing)
+    i.vex.bytes[3] |= 0x80;
+
+  /* Don't always set the broadcast bit if there is no RC.  */
+  if (!i.rounding)
+    {
+      /* Encode the vector length.  */
+      unsigned int vec_length;
+
+      switch (i.tm.opcode_modifier.evex)
+       {
+       case EVEXLIG: /* LL' is ignored */
+         vec_length = evexlig << 5;
+         break;
+       case EVEX128:
+         vec_length = 0 << 5;
+         break;
+       case EVEX256:
+         vec_length = 1 << 5;
+         break;
+       case EVEX512:
+         vec_length = 2 << 5;
+         break;
+       default:
+         abort ();
+         break;
+       }
+      i.vex.bytes[3] |= vec_length;
+      /* Encode the broadcast bit.  */
+      if (i.broadcast)
+       i.vex.bytes[3] |= 0x10;
+    }
+  else
+    {
+      if (i.rounding->type != saeonly)
+       i.vex.bytes[3] |= 0x10 | (i.rounding->type << 5);
+      else
+       i.vex.bytes[3] |= 0x10;
+    }
+
+  if (i.mask && i.mask->mask)
+    i.vex.bytes[3] |= i.mask->mask->reg_num;
+}
+
  static void
  process_immext (void)
  {
    expressionS *exp;
  
-  if (i.tm.cpu_flags.bitfield.cpusse3 && i.operands > 0)
+  if ((i.tm.cpu_flags.bitfield.cpusse3 || i.tm.cpu_flags.bitfield.cpusvme)
+      && i.operands > 0)
      {
-      /* SSE3 Instructions have the fixed operands with an opcode
-        suffix which is coded in the same place as an 8-bit immediate
-        field would be.  Here we check those operands and remove them
-        afterwards.  */
+      /* MONITOR/MWAIT as well as SVME instructions have fixed operands
+        with an opcode suffix which is coded in the same place as an
+        8-bit immediate field would be.
+        Here we check those operands and remove them afterwards.  */
        unsigned int x;
  
        for (x = 0; x < i.operands; x++)
-       if (i.op[x].regs->reg_num != x)
+       if (register_number (i.op[x].regs) != x)
           as_bad (_("can't use register '%s%s' as operand %d in '%s'."),
                   register_prefix, i.op[x].regs->reg_name, x + 1,
                   i.tm.name);
@@ -2932,9 +3364,10 @@ process_immext (void)
       AVX instructions also use this encoding, for some of
       3 argument instructions.  */
  
-  gas_assert (i.imm_operands == 0
+  gas_assert (i.imm_operands <= 1
               && (i.operands <= 2
-                 || (i.tm.opcode_modifier.vex
+                 || ((i.tm.opcode_modifier.vex
+                      || i.tm.opcode_modifier.evex)
                       && i.operands <= 4)));
  
    exp = &im_expressions[i.imm_operands++];
@@ -2946,6 +3379,43 @@ process_immext (void)
    i.tm.extension_opcode = None;
  }
  
+
+static int
+check_hle (void)
+{
+  switch (i.tm.opcode_modifier.hleprefixok)
+    {
+    default:
+      abort ();
+    case HLEPrefixNone:
+      as_bad (_("invalid instruction `%s' after `%s'"),
+             i.tm.name, i.hle_prefix);
+      return 0;
+    case HLEPrefixLock:
+      if (i.prefix[LOCK_PREFIX])
+       return 1;
+      as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
+      return 0;
+    case HLEPrefixAny:
+      return 1;
+    case HLEPrefixRelease:
+      if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
+       {
+         as_bad (_("instruction `%s' after `xacquire' not allowed"),
+                 i.tm.name);
+         return 0;
+       }
+      if (i.mem_operands == 0
+         || !operand_type_check (i.types[i.operands - 1], anymem))
+       {
+         as_bad (_("memory destination needed for instruction `%s'"
+                   " after `xrelease'"), i.tm.name);
+         return 0;
+       }
+      return 1;
+    }
+}
+
  /* This is the guts of the machine-dependent assembler.  LINE points to a
     machine dependent instruction.  This function is supposed to emit
     the frags/bytes it assembles to.  */
@@ -3006,21 +3476,10 @@ md_assemble (char *line)
    /* Don't optimize displacement for movabs since it only takes 64bit
       displacement.  */
    if (i.disp_operands
-      && !i.disp32_encoding)
-    {
-      if (flag_code == CODE_64BIT)
-       {
-         if (strcmp (mnemonic, "movabs") == 0)
-           {
-             if (disallow_64bit_disp)
-               as_bad (_("'movabs' isn't supported in x32 mode"));
-           }
-         else
-           optimize_disp ();
-       }
-      else
-       optimize_disp ();
-    }
+      && i.disp_encoding != disp_encoding_32bit
+      && (flag_code != CODE_64BIT
+         || strcmp (mnemonic, "movabs") != 0))
+    optimize_disp ();
  
    /* Next, we find a template that matches the given insn,
       making sure the overlap of the given operands types is consistent
@@ -3029,7 +3488,7 @@ md_assemble (char *line)
    if (!(t = match_template ()))
      return;
  
-  if (sse_check != sse_check_none
+  if (sse_check != check_none
        && !i.tm.opcode_modifier.noavx
        && (i.tm.cpu_flags.bitfield.cpusse
           || i.tm.cpu_flags.bitfield.cpusse2
@@ -3038,7 +3497,7 @@ md_assemble (char *line)
           || i.tm.cpu_flags.bitfield.cpusse4_1
           || i.tm.cpu_flags.bitfield.cpusse4_2))
      {
-      (sse_check == sse_check_warning
+      (sse_check == check_warning
         ? as_warn
         : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
      }
@@ -3063,6 +3522,14 @@ md_assemble (char *line)
      if (!add_prefix (FWAIT_OPCODE))
        return;
  
+  /* Check if REP prefix is OK.  */
+  if (i.rep_prefix && !i.tm.opcode_modifier.repprefixok)
+    {
+      as_bad (_("invalid instruction `%s' after `%s'"),
+               i.tm.name, i.rep_prefix);
+      return;
+    }
+
    /* Check for lock without a lockable instruction.  Destination operand
       must be memory unless it is xchg (0x86).  */
    if (i.prefix[LOCK_PREFIX]
@@ -3075,6 +3542,25 @@ md_assemble (char *line)
        return;
      }
  
+  /* Check if HLE prefix is OK.  */
+  if (i.hle_prefix && !check_hle ())
+    return;
+
+  /* Check BND prefix.  */
+  if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
+    as_bad (_("expecting valid branch instruction after `bnd'"));
+
+  if (i.tm.cpu_flags.bitfield.cpumpx
+      && flag_code == CODE_64BIT
+      && i.prefix[ADDR_PREFIX])
+    as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
+
+  /* Insert BND prefix.  */
+  if (add_bnd_prefix
+      && i.tm.opcode_modifier.bndprefixok
+      && !i.prefix[BND_PREFIX])
+    add_prefix (BND_PREFIX_OPCODE);
+
    /* Check string instruction segment overrides.  */
    if (i.tm.opcode_modifier.isstring && i.mem_operands != 0)
      {
@@ -3128,6 +3614,9 @@ md_assemble (char *line)
    if (i.tm.opcode_modifier.vex)
      build_vex_prefix (t);
  
+  if (i.tm.opcode_modifier.evex)
+    build_evex_prefix ();
+
    /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
       instructions may define INT_OPCODE as well, so avoid this corner
       case for those instructions that use MODRM.  */
@@ -3207,9 +3696,6 @@ parse_insn (char *line, char *mnemonic)
    const insn_template *t;
    char *dot_p = NULL;
  
-  /* Non-zero if we found a prefix only acceptable with string insns.  */
-  const char *expecting_string_instruction = NULL;
-
    while (1)
      {
        mnem_p = mnemonic;
@@ -3278,7 +3764,12 @@ parse_insn (char *line, char *mnemonic)
             case PREFIX_EXIST:
               return NULL;
             case PREFIX_REP:
-             expecting_string_instruction = current_templates->start->name;
+             if (current_templates->start->cpu_flags.bitfield.cpuhle)
+               i.hle_prefix = current_templates->start->name;
+             else if (current_templates->start->cpu_flags.bitfield.cpumpx)
+               i.bnd_prefix = current_templates->start->name;
+             else
+               i.rep_prefix = current_templates->start->name;
               break;
             default:
               break;
@@ -3296,11 +3787,15 @@ parse_insn (char *line, char *mnemonic)
          encoding.  */
        if (mnem_p - 2 == dot_p && dot_p[1] == 's')
         i.swap_operand = 1;
-      else if (mnem_p - 4 == dot_p 
+      else if (mnem_p - 3 == dot_p
+              && dot_p[1] == 'd'
+              && dot_p[2] == '8')
+       i.disp_encoding = disp_encoding_8bit;
+      else if (mnem_p - 4 == dot_p
                && dot_p[1] == 'd'
                && dot_p[2] == '3'
                && dot_p[3] == '2')
-       i.disp32_encoding = 1;
+       i.disp_encoding = disp_encoding_32bit;
        else
         goto check_suffix;
        mnem_p = dot_p;
@@ -3423,27 +3918,6 @@ skip:
        as_warn (_("use .code16 to ensure correct addressing mode"));
      }
  
-  /* Check for rep/repne without a string instruction.  */
-  if (expecting_string_instruction)
-    {
-      static templates override;
-
-      for (t = current_templates->start; t < current_templates->end; ++t)
-       if (t->opcode_modifier.isstring)
-         break;
-      if (t >= current_templates->end)
-       {
-         as_bad (_("expecting string instruction after `%s'"),
-                 expecting_string_instruction);
-         return NULL;
-       }
-      for (override.start = t; t < current_templates->end; ++t)
-       if (!t->opcode_modifier.isstring)
-         break;
-      override.end = t;
-      current_templates = &override;
-    }
-
    return l;
  }
  
@@ -3582,6 +4056,28 @@ swap_2_operands (int xchg1, int xchg2)
    temp_reloc = i.reloc[xchg2];
    i.reloc[xchg2] = i.reloc[xchg1];
    i.reloc[xchg1] = temp_reloc;
+
+  if (i.mask)
+    {
+      if (i.mask->operand == xchg1)
+       i.mask->operand = xchg2;
+      else if (i.mask->operand == xchg2)
+       i.mask->operand = xchg1;
+    }
+  if (i.broadcast)
+    {
+      if (i.broadcast->operand == xchg1)
+       i.broadcast->operand = xchg2;
+      else if (i.broadcast->operand == xchg2)
+       i.broadcast->operand = xchg1;
+    }
+  if (i.rounding)
+    {
+      if (i.rounding->operand == xchg1)
+       i.rounding->operand = xchg2;
+      else if (i.rounding->operand == xchg2)
+       i.rounding->operand = xchg1;
+    }
  }
  
  static void
@@ -3832,27 +4328,246 @@ optimize_disp (void)
        }
  }
  
-/* Check if operands are valid for the instruction.  Update VEX
-   operand types.  */
+/* Check if operands are valid for the instruction.  */
  
  static int
-VEX_check_operands (const insn_template *t)
+check_VecOperands (const insn_template *t)
  {
-  if (!t->opcode_modifier.vex)
-    return 0;
+  unsigned int op;
  
-  /* Only check VEX_Imm4, which must be the first operand.  */
-  if (t->operand_types[0].bitfield.vec_imm4)
+  /* Without VSIB byte, we can't have a vector register for index.  */
+  if (!t->opcode_modifier.vecsib
+      && i.index_reg
+      && (i.index_reg->reg_type.bitfield.regxmm
+         || i.index_reg->reg_type.bitfield.regymm
+         || i.index_reg->reg_type.bitfield.regzmm))
      {
-      if (i.op[0].imms->X_op != O_constant
-         || !fits_in_imm4 (i.op[0].imms->X_add_number))
-       {
-         i.error = bad_imm4;
-         return 1;
-       }
+      i.error = unsupported_vector_index_register;
+      return 1;
+    }
  
-      /* Turn off Imm8 so that update_imm won't complain.  */
-      i.types[0] = vec_imm4;
+  /* Check if default mask is allowed.  */
+  if (t->opcode_modifier.nodefmask
+      && (!i.mask || i.mask->mask->reg_num == 0))
+    {
+      i.error = no_default_mask;
+      return 1;
+    }
+
+  /* For VSIB byte, we need a vector register for index, and all vector
+     registers must be distinct.  */
+  if (t->opcode_modifier.vecsib)
+    {
+      if (!i.index_reg
+         || !((t->opcode_modifier.vecsib == VecSIB128
+               && i.index_reg->reg_type.bitfield.regxmm)
+              || (t->opcode_modifier.vecsib == VecSIB256
+                  && i.index_reg->reg_type.bitfield.regymm)
+              || (t->opcode_modifier.vecsib == VecSIB512
+                  && i.index_reg->reg_type.bitfield.regzmm)))
+      {
+       i.error = invalid_vsib_address;
+       return 1;
+      }
+
+      gas_assert (i.reg_operands == 2 || i.mask);
+      if (i.reg_operands == 2 && !i.mask)
+       {
+         gas_assert (i.types[0].bitfield.regxmm
+                     || i.types[0].bitfield.regymm
+                     || i.types[0].bitfield.regzmm);
+         gas_assert (i.types[2].bitfield.regxmm
+                     || i.types[2].bitfield.regymm
+                     || i.types[2].bitfield.regzmm);
+         if (operand_check == check_none)
+           return 0;
+         if (register_number (i.op[0].regs)
+             != register_number (i.index_reg)
+             && register_number (i.op[2].regs)
+                != register_number (i.index_reg)
+             && register_number (i.op[0].regs)
+                != register_number (i.op[2].regs))
+           return 0;
+         if (operand_check == check_error)
+           {
+             i.error = invalid_vector_register_set;
+             return 1;
+           }
+         as_warn (_("mask, index, and destination registers should be distinct"));
+       }
+    }
+
+  /* Check if broadcast is supported by the instruction and is applied
+     to the memory operand.  */
+  if (i.broadcast)
+    {
+      int broadcasted_opnd_size;
+
+      /* Check if specified broadcast is supported in this instruction,
+        and it's applied to memory operand of DWORD or QWORD type,
+        depending on VecESize.  */
+      if (i.broadcast->type != t->opcode_modifier.broadcast
+         || !i.types[i.broadcast->operand].bitfield.mem
+         || (t->opcode_modifier.vecesize == 0
+             && !i.types[i.broadcast->operand].bitfield.dword
+             && !i.types[i.broadcast->operand].bitfield.unspecified)
+         || (t->opcode_modifier.vecesize == 1
+             && !i.types[i.broadcast->operand].bitfield.qword
+             && !i.types[i.broadcast->operand].bitfield.unspecified))
+       goto bad_broadcast;
+
+      broadcasted_opnd_size = t->opcode_modifier.vecesize ? 64 : 32;
+      if (i.broadcast->type == BROADCAST_1TO16)
+       broadcasted_opnd_size <<= 4; /* Broadcast 1to16.  */
+      else if (i.broadcast->type == BROADCAST_1TO8)
+       broadcasted_opnd_size <<= 3; /* Broadcast 1to8.  */
+      else
+       goto bad_broadcast;
+
+      if ((broadcasted_opnd_size == 256
+          && !t->operand_types[i.broadcast->operand].bitfield.ymmword)
+         || (broadcasted_opnd_size == 512
+             && !t->operand_types[i.broadcast->operand].bitfield.zmmword))
+       {
+       bad_broadcast:
+         i.error = unsupported_broadcast;
+         return 1;
+       }
+    }
+  /* If broadcast is supported in this instruction, we need to check if
+     operand of one-element size isn't specified without broadcast.  */
+  else if (t->opcode_modifier.broadcast && i.mem_operands)
+    {
+      /* Find memory operand.  */
+      for (op = 0; op < i.operands; op++)
+       if (operand_type_check (i.types[op], anymem))
+         break;
+      gas_assert (op < i.operands);
+      /* Check size of the memory operand.  */
+      if ((t->opcode_modifier.vecesize == 0
+          && i.types[op].bitfield.dword)
+         || (t->opcode_modifier.vecesize == 1
+             && i.types[op].bitfield.qword))
+       {
+         i.error = broadcast_needed;
+         return 1;
+       }
+    }
+
+  /* Check if requested masking is supported.  */
+  if (i.mask
+      && (!t->opcode_modifier.masking
+         || (i.mask->zeroing
+             && t->opcode_modifier.masking == MERGING_MASKING)))
+    {
+      i.error = unsupported_masking;
+      return 1;
+    }
+
+  /* Check if masking is applied to dest operand.  */
+  if (i.mask && (i.mask->operand != (int) (i.operands - 1)))
+    {
+      i.error = mask_not_on_destination;
+      return 1;
+    }
+
+  /* Check RC/SAE.  */
+  if (i.rounding)
+    {
+      if ((i.rounding->type != saeonly
+          && !t->opcode_modifier.staticrounding)
+         || (i.rounding->type == saeonly
+             && (t->opcode_modifier.staticrounding
+                 || !t->opcode_modifier.sae)))
+       {
+         i.error = unsupported_rc_sae;
+         return 1;
+       }
+      /* If the instruction has several immediate operands and one of
+        them is rounding, the rounding operand should be the last
+        immediate operand.  */
+      if (i.imm_operands > 1
+         && i.rounding->operand != (int) (i.imm_operands - 1))
+       {
+         i.error = rc_sae_operand_not_last_imm;
+         return 1;
+       }
+    }
+
+  /* Check vector Disp8 operand.  */
+  if (t->opcode_modifier.disp8memshift)
+    {
+      if (i.broadcast)
+       i.memshift = t->opcode_modifier.vecesize ? 3 : 2;
+      else
+       i.memshift = t->opcode_modifier.disp8memshift;
+
+      for (op = 0; op < i.operands; op++)
+       if (operand_type_check (i.types[op], disp)
+           && i.op[op].disps->X_op == O_constant)
+         {
+           offsetT value = i.op[op].disps->X_add_number;
+           int vec_disp8_ok = fits_in_vec_disp8 (value);
+           if (t->operand_types [op].bitfield.vec_disp8)
+             {
+               if (vec_disp8_ok)
+                 i.types[op].bitfield.vec_disp8 = 1;
+               else
+                 {
+                   /* Vector insn can only have Vec_Disp8/Disp32 in
+                      32/64bit modes, and Vec_Disp8/Disp16 in 16bit
+                      mode.  */
+                   i.types[op].bitfield.disp8 = 0;
+                   if (flag_code != CODE_16BIT)
+                     i.types[op].bitfield.disp16 = 0;
+                 }
+             }
+           else if (flag_code != CODE_16BIT)
+             {
+               /* One form of this instruction supports vector Disp8.
+                  Try vector Disp8 if we need to use Disp32.  */
+               if (vec_disp8_ok && !fits_in_signed_byte (value))
+                 {
+                   i.error = try_vector_disp8;
+                   return 1;
+                 }
+             }
+         }
+    }
+  else
+    i.memshift = -1;
+
+  return 0;
+}
+
+/* Check if operands are valid for the instruction.  Update VEX
+   operand types.  */
+
+static int
+VEX_check_operands (const insn_template *t)
+{
+  /* VREX is only valid with EVEX prefix.  */
+  if (i.need_vrex && !t->opcode_modifier.evex)
+    {
+      i.error = invalid_register_operand;
+      return 1;
+    }
+
+  if (!t->opcode_modifier.vex)
+    return 0;
+
+  /* Only check VEX_Imm4, which must be the first operand.  */
+  if (t->operand_types[0].bitfield.vec_imm4)
+    {
+      if (i.op[0].imms->X_op != O_constant
+         || !fits_in_imm4 (i.op[0].imms->X_add_number))
+       {
+         i.error = bad_imm4;
+         return 1;
+       }
+
+      /* Turn off Imm8 so that update_imm won't complain.  */
+      i.types[0] = vec_imm4;
      }
  
    return 0;
@@ -3872,6 +4587,7 @@ match_template (void)
    unsigned int j;
    unsigned int found_cpu_match;
    unsigned int check_register;
+  enum i386_error specific_error = 0;
  
  #if MAX_OPERANDS != 5
  # error "MAX_OPERANDS must be 5."
@@ -3953,10 +4669,12 @@ match_template (void)
               : intel_float_operand (t->name) != 2)
           && ((!operand_types[0].bitfield.regmmx
                && !operand_types[0].bitfield.regxmm
-              && !operand_types[0].bitfield.regymm)
+              && !operand_types[0].bitfield.regymm
+              && !operand_types[0].bitfield.regzmm)
               || (!operand_types[t->operands > 1].bitfield.regmmx
                   && !!operand_types[t->operands > 1].bitfield.regxmm
-                 && !!operand_types[t->operands > 1].bitfield.regymm))
+                 && !!operand_types[t->operands > 1].bitfield.regymm
+                 && !!operand_types[t->operands > 1].bitfield.regzmm))
           && (t->base_opcode != 0x0fc7
               || t->extension_opcode != 1 /* cmpxchg8b */))
         continue;
@@ -4168,9 +4886,12 @@ check_reverse:
           continue;
         }
  
-      /* Check if VEX operands are valid.  */
-      if (VEX_check_operands (t))
-       continue;
+      /* Check if vector and VEX operands are valid.  */
+      if (check_VecOperands (t) || VEX_check_operands (t))
+       {
+         specific_error = i.error;
+         continue;
+       }
  
        /* We've found a match; break out of loop.  */
        break;
@@ -4180,7 +4901,7 @@ check_reverse:
      {
        /* We found no match.  */
        const char *err_msg;
-      switch (i.error)
+      switch (specific_error ? specific_error : i.error)
         {
         default:
           abort ();
@@ -4200,7 +4921,7 @@ check_reverse:
           err_msg = _("invalid instruction suffix");
           break;
         case bad_imm4:
-         err_msg = _("Imm4 isn't the first operand");
+         err_msg = _("constant doesn't fit in 4 bits");
           break;
         case old_gcc_only:
           err_msg = _("only supported with old gcc");
@@ -4212,7 +4933,47 @@ check_reverse:
           err_msg = _("unsupported syntax");
           break;
         case unsupported:
-         err_msg = _("unsupported");
+         as_bad (_("unsupported instruction `%s'"),
+                 current_templates->start->name);
+         return NULL;
+       case invalid_vsib_address:
+         err_msg = _("invalid VSIB address");
+         break;
+       case invalid_vector_register_set:
+         err_msg = _("mask, index, and destination registers must be distinct");
+         break;
+       case unsupported_vector_index_register:
+         err_msg = _("unsupported vector index register");
+         break;
+       case unsupported_broadcast:
+         err_msg = _("unsupported broadcast");
+         break;
+       case broadcast_not_on_src_operand:
+         err_msg = _("broadcast not on source memory operand");
+         break;
+       case broadcast_needed:
+         err_msg = _("broadcast is needed for operand of such type");
+         break;
+       case unsupported_masking:
+         err_msg = _("unsupported masking");
+         break;
+       case mask_not_on_destination:
+         err_msg = _("mask not on destination operand");
+         break;
+       case no_default_mask:
+         err_msg = _("default mask isn't allowed");
+         break;
+       case unsupported_rc_sae:
+         err_msg = _("unsupported static rounding/sae");
+         break;
+       case rc_sae_operand_not_last_imm:
+         if (intel_syntax)
+           err_msg = _("RC/SAE operand must precede immediate operands");
+         else
+           err_msg = _("RC/SAE operand must follow immediate operands");
+         break;
+       case invalid_register_operand:
+         err_msg = _("invalid register operand");
           break;
         }
        as_bad (_("%s for `%s'"), err_msg,
@@ -4406,9 +5167,10 @@ process_suffix (void)
             return 0;
         }
        else if (i.suffix == XMMWORD_MNEM_SUFFIX
-              || i.suffix == YMMWORD_MNEM_SUFFIX)
+              || i.suffix == YMMWORD_MNEM_SUFFIX
+              || i.suffix == ZMMWORD_MNEM_SUFFIX)
         {
-         /* Skip if the instruction has x/y suffix.  match_template
+         /* Skip if the instruction has x/y/z suffix.  match_template
              should check if it is a valid suffix.  */
         }
        else if (intel_syntax && i.tm.opcode_modifier.ignoresize)
@@ -4496,7 +5258,8 @@ process_suffix (void)
    if (i.suffix
        && i.suffix != BYTE_MNEM_SUFFIX
        && i.suffix != XMMWORD_MNEM_SUFFIX
-      && i.suffix != YMMWORD_MNEM_SUFFIX)
+      && i.suffix != YMMWORD_MNEM_SUFFIX
+      && i.suffix != ZMMWORD_MNEM_SUFFIX)
      {
        /* It's not a byte, select word/dword operation.  */
        if (i.tm.opcode_modifier.w)
@@ -4580,6 +5343,10 @@ check_byte_reg (void)
        if (i.types[op].bitfield.reg8)
         continue;
  
+      /* I/O port address operands are OK too.  */
+      if (i.tm.operand_types[op].bitfield.inoutportreg)
+       continue;
+
        /* crc32 doesn't generate this warning.  */
        if (i.tm.base_opcode == 0xf20f38f0)
         continue;
@@ -4587,21 +5354,13 @@ check_byte_reg (void)
        if ((i.types[op].bitfield.reg16
            || i.types[op].bitfield.reg32
            || i.types[op].bitfield.reg64)
-         && i.op[op].regs->reg_num < 4)
+         && i.op[op].regs->reg_num < 4
+         /* Prohibit these changes in 64bit mode, since the lowering
+            would be more complicated.  */
+         && flag_code != CODE_64BIT)
         {
-         /* Prohibit these changes in the 64bit mode, since the
-            lowering is more complicated.  */
-         if (flag_code == CODE_64BIT
-             && !i.tm.operand_types[op].bitfield.inoutportreg)
-           {
-             as_bad (_("Incorrect register `%s%s' used with `%c' suffix"),
-                     register_prefix, i.op[op].regs->reg_name,
-                     i.suffix);
-             return 0;
-           }
  #if REGISTER_WARNINGS
-         if (!quiet_warnings
-             && !i.tm.operand_types[op].bitfield.inoutportreg)
+         if (!quiet_warnings)
             as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
                      register_prefix,
                      (i.op[op].regs + (i.types[op].bitfield.reg16
@@ -4620,6 +5379,7 @@ check_byte_reg (void)
           || i.types[op].bitfield.regmmx
           || i.types[op].bitfield.regxmm
           || i.types[op].bitfield.regymm
+         || i.types[op].bitfield.regzmm
           || i.types[op].bitfield.sreg2
           || i.types[op].bitfield.sreg3
           || i.types[op].bitfield.control
@@ -4659,7 +5419,7 @@ check_long_reg (void)
                 i.suffix);
         return 0;
        }
-  /* Warn if the e prefix on a general reg is missing.  */
+    /* Warn if the e prefix on a general reg is missing.  */
      else if ((!quiet_warnings || flag_code == CODE_64BIT)
              && i.types[op].bitfield.reg16
              && (i.tm.operand_types[op].bitfield.reg32
@@ -4669,22 +5429,19 @@ check_long_reg (void)
            lowering is more complicated.  */
         if (flag_code == CODE_64BIT)
           {
-           as_bad (_("Incorrect register `%s%s' used with `%c' suffix"),
+           as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
                     register_prefix, i.op[op].regs->reg_name,
                     i.suffix);
             return 0;
           }
  #if REGISTER_WARNINGS
-       else
-         as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
-                  register_prefix,
-                  (i.op[op].regs + REGNAM_EAX - REGNAM_AX)->reg_name,
-                  register_prefix,
-                  i.op[op].regs->reg_name,
-                  i.suffix);
+       as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
+                register_prefix,
+                (i.op[op].regs + REGNAM_EAX - REGNAM_AX)->reg_name,
+                register_prefix, i.op[op].regs->reg_name, i.suffix);
  #endif
        }
-  /* Warn if the r prefix on a general reg is missing.  */
+    /* Warn if the r prefix on a general reg is present.  */
      else if (i.types[op].bitfield.reg64
              && (i.tm.operand_types[op].bitfield.reg32
                  || i.tm.operand_types[op].bitfield.acc))
@@ -4698,7 +5455,7 @@ check_long_reg (void)
           }
         else
           {
-           as_bad (_("Incorrect register `%s%s' used with `%c' suffix"),
+           as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
                     register_prefix, i.op[op].regs->reg_name,
                     i.suffix);
             return 0;
@@ -4727,7 +5484,7 @@ check_qword_reg (void)
                 i.suffix);
         return 0;
        }
-  /* Warn if the e prefix on a general reg is missing.  */
+    /* Warn if the r prefix on a general reg is missing.  */
      else if ((i.types[op].bitfield.reg16
               || i.types[op].bitfield.reg32)
              && (i.tm.operand_types[op].bitfield.reg32
@@ -4744,7 +5501,7 @@ check_qword_reg (void)
           }
         else
           {
-           as_bad (_("Incorrect register `%s%s' used with `%c' suffix"),
+           as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
                     register_prefix, i.op[op].regs->reg_name,
                     i.suffix);
             return 0;
@@ -4772,9 +5529,10 @@ check_word_reg (void)
                 i.suffix);
         return 0;
        }
-  /* Warn if the e prefix on a general reg is present.  */
+    /* Warn if the e or r prefix on a general reg is present.  */
      else if ((!quiet_warnings || flag_code == CODE_64BIT)
-            && i.types[op].bitfield.reg32
+            && (i.types[op].bitfield.reg32
+                || i.types[op].bitfield.reg64)
              && (i.tm.operand_types[op].bitfield.reg16
                  || i.tm.operand_types[op].bitfield.acc))
        {
@@ -4782,19 +5540,16 @@ check_word_reg (void)
            lowering is more complicated.  */
         if (flag_code == CODE_64BIT)
           {
-           as_bad (_("Incorrect register `%s%s' used with `%c' suffix"),
+           as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
                     register_prefix, i.op[op].regs->reg_name,
                     i.suffix);
             return 0;
           }
-       else
  #if REGISTER_WARNINGS
-         as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
-                  register_prefix,
-                  (i.op[op].regs + REGNAM_AX - REGNAM_EAX)->reg_name,
-                  register_prefix,
-                  i.op[op].regs->reg_name,
-                  i.suffix);
+       as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
+                register_prefix,
+                (i.op[op].regs + REGNAM_AX - REGNAM_EAX)->reg_name,
+                register_prefix, i.op[op].regs->reg_name, i.suffix);
  #endif
        }
    return 1;
@@ -4921,7 +5676,7 @@ process_operands (void)
         {
           /* The first operand is implicit and must be xmm0.  */
           gas_assert (operand_type_equal (&i.types[0], &regxmm));
-         if (i.op[0].regs->reg_num != 0)
+         if (register_number (i.op[0].regs) != 0)
             return bad_implicit_operand (1);
  
           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
@@ -4991,11 +5746,12 @@ duplicate:
      {
        unsigned int j;
  
-      /* The first operand is implicit and must be xmm0/ymm0.  */
+      /* The first operand is implicit and must be xmm0/ymm0/zmm0.  */
        gas_assert (i.reg_operands
                   && (operand_type_equal (&i.types[0], &regxmm)
-                     || operand_type_equal (&i.types[0], &regymm)));
-      if (i.op[0].regs->reg_num != 0)
+                     || operand_type_equal (&i.types[0], &regymm)
+                     || operand_type_equal (&i.types[0], &regzmm)));
+      if (register_number (i.op[0].regs) != 0)
         return bad_implicit_operand (i.types[0].bitfield.regxmm);
  
        for (j = 1; j < i.operands; j++)
@@ -5148,8 +5904,8 @@ build_modrm_byte (void)
        /* There are 2 kinds of instructions:
           1. 5 operands: 4 register operands or 3 register operands
           plus 1 memory operand plus one Vec_Imm4 operand, VexXDS, and
-         VexW0 or VexW1.  The destination must be either XMM or YMM
-         register.
+         VexW0 or VexW1.  The destination must be either XMM, YMM or
+        ZMM register.
           2. 4 operands: 4 register operands or 3 register operands
           plus 1 memory operand, VexXDS, and VexImmExt  */
        gas_assert ((i.reg_operands == 4
@@ -5161,7 +5917,8 @@ build_modrm_byte (void)
                            && (i.tm.opcode_modifier.vexw == VEXW0
                                || i.tm.opcode_modifier.vexw == VEXW1)
                            && (operand_type_equal (&i.tm.operand_types[dest], &regxmm)
-                              || operand_type_equal (&i.tm.operand_types[dest], &regymm)))));
+                              || operand_type_equal (&i.tm.operand_types[dest], &regymm)
+                              || operand_type_equal (&i.tm.operand_types[dest], &regzmm)))));
  
        if (i.imm_operands == 0)
          {
@@ -5196,13 +5953,13 @@ build_modrm_byte (void)
            gas_assert (operand_type_equal (&i.tm.operand_types[reg_slot],
                                           &regxmm)
                        || operand_type_equal (&i.tm.operand_types[reg_slot],
-                                             &regymm));
+                                             &regymm)
+                      || operand_type_equal (&i.tm.operand_types[reg_slot],
+                                             &regzmm));
            exp->X_op = O_constant;
-          exp->X_add_number
-              = ((i.op[reg_slot].regs->reg_num
-                  + ((i.op[reg_slot].regs->reg_flags & RegRex) ? 8 : 0))
-                << 4);
-        }
+          exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
+         gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
+       }
        else
          {
            unsigned int imm_slot;
@@ -5243,16 +6000,19 @@ build_modrm_byte (void)
            gas_assert (operand_type_equal (&i.tm.operand_types[reg_slot],
                                           &regxmm)
                       || operand_type_equal (&i.tm.operand_types[reg_slot],
-                                            &regymm));
+                                            &regymm)
+                     || operand_type_equal (&i.tm.operand_types[reg_slot],
+                                            &regzmm));
            i.op[imm_slot].imms->X_add_number
-              |= ((i.op[reg_slot].regs->reg_num
-                   + ((i.op[reg_slot].regs->reg_flags & RegRex) ? 8 : 0))
-                 << 4);
+              |= register_number (i.op[reg_slot].regs) << 4;
+         gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
          }
  
        gas_assert (operand_type_equal (&i.tm.operand_types[nds], &regxmm)
                    || operand_type_equal (&i.tm.operand_types[nds],
-                                         &regymm));
+                                         &regymm)
+                  || operand_type_equal (&i.tm.operand_types[nds],
+                                         &regzmm));
        i.vex.register_specifier = i.op[nds].regs;
      }
    else
@@ -5305,7 +6065,8 @@ build_modrm_byte (void)
                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
                           && i.imm_operands == 1
                           && (i.types[0].bitfield.imm8
-                             || i.types[i.operands - 1].bitfield.imm8)));
+                             || i.types[i.operands - 1].bitfield.imm8
+                             || i.rounding)));
           if (i.imm_operands == 2)
             source = 2;
           else
@@ -5317,6 +6078,23 @@ build_modrm_byte (void)
             }
           break;
         case 5:
+         if (i.tm.opcode_modifier.evex)
+           {
+             /* For EVEX instructions, when there are 5 operands, the
+                first one must be immediate operand.  If the second one
+                is immediate operand, the source operand is the 3th
+                one.  If the last one is immediate operand, the source
+                operand is the 2nd one.  */
+             gas_assert (i.imm_operands == 2
+                         && i.tm.opcode_modifier.sae
+                         && operand_type_check (i.types[0], imm));
+             if (operand_type_check (i.types[1], imm))
+               source = 2;
+             else if (operand_type_check (i.types[4], imm))
+               source = 1;
+             else
+               abort ();
+           }
           break;
         default:
           abort ();
@@ -5326,13 +6104,18 @@ build_modrm_byte (void)
         {
           dest = source + 1;
  
+         /* RC/SAE operand could be between DEST and SRC.  That happens
+            when one operand is GPR and the other one is XMM/YMM/ZMM
+            register.  */
+         if (i.rounding && i.rounding->operand == (int) dest)
+           dest++;
+
           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
             {
-             /* For instructions with VexNDS, the register-only
-                source operand must be 32/64bit integer, XMM or
-                YMM register.  It is encoded in VEX prefix.  We
-                need to clear RegMem bit before calling
-                operand_type_equal.  */
+             /* For instructions with VexNDS, the register-only source
+                operand must be 32/64bit integer, XMM, YMM or ZMM
+                register.  It is encoded in VEX prefix.  We need to
+                clear RegMem bit before calling operand_type_equal.  */
  
               i386_operand_type op;
               unsigned int vvvv;
@@ -5354,7 +6137,9 @@ build_modrm_byte (void)
                   || (op.bitfield.reg32 != 1
                       && !op.bitfield.reg64 != 1
                       && !operand_type_equal (&op, &regxmm)
-                     && !operand_type_equal (&op, &regymm)))
+                     && !operand_type_equal (&op, &regymm)
+                     && !operand_type_equal (&op, &regzmm)
+                     && !operand_type_equal (&op, &regmask)))
                 abort ();
               i.vex.register_specifier = i.op[vvvv].regs;
               dest++;
@@ -5375,8 +6160,12 @@ build_modrm_byte (void)
           i.rm.regmem = i.op[source].regs->reg_num;
           if ((i.op[dest].regs->reg_flags & RegRex) != 0)
             i.rex |= REX_R;
+         if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
+           i.vrex |= REX_R;
           if ((i.op[source].regs->reg_flags & RegRex) != 0)
             i.rex |= REX_B;
+         if ((i.op[source].regs->reg_flags & RegVRex) != 0)
+           i.vrex |= REX_B;
         }
        else
         {
@@ -5384,8 +6173,12 @@ build_modrm_byte (void)
           i.rm.regmem = i.op[dest].regs->reg_num;
           if ((i.op[dest].regs->reg_flags & RegRex) != 0)
             i.rex |= REX_B;
+         if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
+           i.vrex |= REX_B;
           if ((i.op[source].regs->reg_flags & RegRex) != 0)
             i.rex |= REX_R;
+         if ((i.op[source].regs->reg_flags & RegVRex) != 0)
+           i.vrex |= REX_R;
         }
        if (flag_code != CODE_64BIT && (i.rex & (REX_R | REX_B)))
         {
@@ -5410,15 +6203,57 @@ build_modrm_byte (void)
               break;
           gas_assert (op < i.operands);
  
+         if (i.tm.opcode_modifier.vecsib)
+           {
+             if (i.index_reg->reg_num == RegEiz
+                 || i.index_reg->reg_num == RegRiz)
+               abort ();
+
+             i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
+             if (!i.base_reg)
+               {
+                 i.sib.base = NO_BASE_REGISTER;
+                 i.sib.scale = i.log2_scale_factor;
+                 /* No Vec_Disp8 if there is no base.  */
+                 i.types[op].bitfield.vec_disp8 = 0;
+                 i.types[op].bitfield.disp8 = 0;
+                 i.types[op].bitfield.disp16 = 0;
+                 i.types[op].bitfield.disp64 = 0;
+                 if (flag_code != CODE_64BIT)
+                   {
+                     /* Must be 32 bit */
+                     i.types[op].bitfield.disp32 = 1;
+                     i.types[op].bitfield.disp32s = 0;
+                   }
+                 else
+                   {
+                     i.types[op].bitfield.disp32 = 0;
+                     i.types[op].bitfield.disp32s = 1;
+                   }
+               }
+             i.sib.index = i.index_reg->reg_num;
+             if ((i.index_reg->reg_flags & RegRex) != 0)
+               i.rex |= REX_X;
+             if ((i.index_reg->reg_flags & RegVRex) != 0)
+               i.vrex |= REX_X;
+           }
+
           default_seg = &ds;
  
           if (i.base_reg == 0)
             {
               i.rm.mode = 0;
               if (!i.disp_operands)
-               fake_zero_displacement = 1;
+               {
+                 fake_zero_displacement = 1;
+                 /* Instructions with VSIB byte need 32bit displacement
+                    if there is no base register.  */
+                 if (i.tm.opcode_modifier.vecsib)
+                   i.types[op].bitfield.disp32 = 1;
+               }
               if (i.index_reg == 0)
                 {
+                 gas_assert (!i.tm.opcode_modifier.vecsib);
                   /* Operand is just <disp>  */
                   if (flag_code == CODE_64BIT)
                     {
@@ -5444,8 +6279,9 @@ build_modrm_byte (void)
                       i.types[op] = disp32;
                     }
                 }
-             else /* !i.base_reg && i.index_reg  */
+             else if (!i.tm.opcode_modifier.vecsib)
                 {
+                 /* !i.base_reg && i.index_reg  */
                   if (i.index_reg->reg_num == RegEiz
                       || i.index_reg->reg_num == RegRiz)
                     i.sib.index = NO_INDEX_REGISTER;
@@ -5454,6 +6290,8 @@ build_modrm_byte (void)
                   i.sib.base = NO_BASE_REGISTER;
                   i.sib.scale = i.log2_scale_factor;
                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
+                 /* No Vec_Disp8 if there is no base.  */
+                 i.types[op].bitfield.vec_disp8 = 0;
                   i.types[op].bitfield.disp8 = 0;
                   i.types[op].bitfield.disp16 = 0;
                   i.types[op].bitfield.disp64 = 0;
@@ -5476,18 +6314,21 @@ build_modrm_byte (void)
           else if (i.base_reg->reg_num == RegRip ||
                    i.base_reg->reg_num == RegEip)
             {
+             gas_assert (!i.tm.opcode_modifier.vecsib);
               i.rm.regmem = NO_BASE_REGISTER;
               i.types[op].bitfield.disp8 = 0;
               i.types[op].bitfield.disp16 = 0;
               i.types[op].bitfield.disp32 = 0;
               i.types[op].bitfield.disp32s = 1;
               i.types[op].bitfield.disp64 = 0;
+             i.types[op].bitfield.vec_disp8 = 0;
               i.flags[op] |= Operand_PCrel;
               if (! i.disp_operands)
                 fake_zero_displacement = 1;
             }
           else if (i.base_reg->reg_type.bitfield.reg16)
             {
+             gas_assert (!i.tm.opcode_modifier.vecsib);
               switch (i.base_reg->reg_num)
                 {
                 case 3: /* (%bx)  */
@@ -5504,7 +6345,10 @@ build_modrm_byte (void)
                       if (operand_type_check (i.types[op], disp) == 0)
                         {
                           /* fake (%bp) into 0(%bp)  */
-                         i.types[op].bitfield.disp8 = 1;
+                         if (i.tm.operand_types[op].bitfield.vec_disp8)
+                           i.types[op].bitfield.vec_disp8 = 1;
+                         else
+                           i.types[op].bitfield.disp8 = 1;
                           fake_zero_displacement = 1;
                         }
                     }
@@ -5524,6 +6368,8 @@ build_modrm_byte (void)
                   i386_operand_type temp;
                   operand_type_set (&temp, 0);
                   temp.bitfield.disp8 = i.types[op].bitfield.disp8;
+                 temp.bitfield.vec_disp8
+                   = i.types[op].bitfield.vec_disp8;
                   i.types[op] = temp;
                   if (i.prefix[ADDR_PREFIX] == 0)
                     i.types[op].bitfield.disp32s = 1;
@@ -5531,28 +6377,29 @@ build_modrm_byte (void)
                     i.types[op].bitfield.disp32 = 1;
                 }
  
-             i.rm.regmem = i.base_reg->reg_num;
+             if (!i.tm.opcode_modifier.vecsib)
+               i.rm.regmem = i.base_reg->reg_num;
               if ((i.base_reg->reg_flags & RegRex) != 0)
                 i.rex |= REX_B;
               i.sib.base = i.base_reg->reg_num;
               /* x86-64 ignores REX prefix bit here to avoid decoder
                  complications.  */
-             if ((i.base_reg->reg_num & 7) == EBP_REG_NUM)
-               {
+             if (!(i.base_reg->reg_flags & RegRex)
+                 && (i.base_reg->reg_num == EBP_REG_NUM
+                  || i.base_reg->reg_num == ESP_REG_NUM))
                   default_seg = &ss;
-                 if (i.disp_operands == 0)
-                   {
-                     fake_zero_displacement = 1;
-                     i.types[op].bitfield.disp8 = 1;
-                   }
-               }
-             else if (i.base_reg->reg_num == ESP_REG_NUM)
+             if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
                 {
-                 default_seg = &ss;
+                 fake_zero_displacement = 1;
+                 if (i.tm.operand_types [op].bitfield.vec_disp8)
+                   i.types[op].bitfield.vec_disp8 = 1;
+                 else
+                   i.types[op].bitfield.disp8 = 1;
                 }
               i.sib.scale = i.log2_scale_factor;
               if (i.index_reg == 0)
                 {
+                 gas_assert (!i.tm.opcode_modifier.vecsib);
                   /* <disp>(%esp) becomes two byte modrm with no index
                      register.  We've already stored the code for esp
                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
@@ -5560,7 +6407,7 @@ build_modrm_byte (void)
                      extra modrm byte.  */
                   i.sib.index = NO_INDEX_REGISTER;
                 }
-             else
+             else if (!i.tm.opcode_modifier.vecsib)
                 {
                   if (i.index_reg->reg_num == RegEiz
                       || i.index_reg->reg_num == RegRiz)
@@ -5577,7 +6424,19 @@ build_modrm_byte (void)
                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
                 i.rm.mode = 0;
               else
-               i.rm.mode = mode_from_disp_size (i.types[op]);
+               {
+                 if (!fake_zero_displacement
+                     && !i.disp_operands
+                     && i.disp_encoding)
+                   {
+                     fake_zero_displacement = 1;
+                     if (i.disp_encoding == disp_encoding_8bit)
+                       i.types[op].bitfield.disp8 = 1;
+                     else
+                       i.types[op].bitfield.disp32 = 1;
+                   }
+                 i.rm.mode = mode_from_disp_size (i.types[op]);
+               }
             }
  
           if (fake_zero_displacement)
@@ -5662,6 +6521,9 @@ build_modrm_byte (void)
                 || i.types[op].bitfield.regmmx
                 || i.types[op].bitfield.regxmm
                 || i.types[op].bitfield.regymm
+               || i.types[op].bitfield.regbnd
+               || i.types[op].bitfield.regzmm
+               || i.types[op].bitfield.regmask
                 || i.types[op].bitfield.sreg2
                 || i.types[op].bitfield.sreg3
                 || i.types[op].bitfield.control
@@ -5712,7 +6574,7 @@ build_modrm_byte (void)
                   vex_reg = op + 1;
                 }
               else
-               { 
+               {
                   /* There are only 2 operands.  */
                   gas_assert (op < 2 && i.operands == 2);
                   vex_reg = 1;
@@ -5728,7 +6590,9 @@ build_modrm_byte (void)
               if (type->bitfield.reg32 != 1
                   && type->bitfield.reg64 != 1
                   && !operand_type_equal (type, &regxmm)
-                 && !operand_type_equal (type, &regymm))
+                 && !operand_type_equal (type, &regymm)
+                 && !operand_type_equal (type, &regzmm)
+                 && !operand_type_equal (type, &regmask))
                 abort ();
  
               i.vex.register_specifier = i.op[vex_reg].regs;
@@ -5744,12 +6608,16 @@ build_modrm_byte (void)
                   i.rm.regmem = i.op[op].regs->reg_num;
                   if ((i.op[op].regs->reg_flags & RegRex) != 0)
                     i.rex |= REX_B;
+                 if ((i.op[op].regs->reg_flags & RegVRex) != 0)
+                   i.vrex |= REX_B;
                 }
               else
                 {
                   i.rm.reg = i.op[op].regs->reg_num;
                   if ((i.op[op].regs->reg_flags & RegRex) != 0)
                     i.rex |= REX_R;
+                 if ((i.op[op].regs->reg_flags & RegVRex) != 0)
+                   i.vrex |= REX_R;
                 }
             }
  
@@ -5779,7 +6647,7 @@ output_branch (void)
    offsetT off;
  
    code16 = flag_code == CODE_16BIT ? CODE16 : 0;
-  size = i.disp32_encoding ? BIG : SMALL;
+  size = i.disp_encoding == disp_encoding_32bit ? BIG : SMALL;
  
    prefix = 0;
    if (i.prefix[DATA_PREFIX] != 0)
@@ -5801,6 +6669,13 @@ output_branch (void)
        i.prefixes--;
      }
  
+  /* BND prefixed jump.  */
+  if (i.prefix[BND_PREFIX] != 0)
+    {
+      FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]);
+      i.prefixes -= 1;
+    }
+
    if (i.prefixes != 0 && !intel_syntax)
      as_warn (_("skipping prefixes on this instruction"));
  
@@ -5842,7 +6717,13 @@ output_branch (void)
  
    /* 1 possible extra opcode + 4 byte displacement go in var part.
       Pass reloc in fr_var.  */
-  frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
+  frag_var (rs_machine_dependent, 5,
+           ((!object_64bit
+             || i.reloc[0] != NO_RELOC 
+             || (i.bnd_prefix == NULL && !add_bnd_prefix))
+            ? i.reloc[0]
+            : BFD_RELOC_X86_64_PC32_BND),
+           subtype, sym, off, p);
  }
  
  static void
@@ -5895,14 +6776,33 @@ output_jump (void)
        i.prefixes -= 1;
      }
  
+  /* BND prefixed jump.  */
+  if (i.prefix[BND_PREFIX] != 0)
+    {
+      FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]);
+      i.prefixes -= 1;
+    }
+
    if (i.prefixes != 0 && !intel_syntax)
      as_warn (_("skipping prefixes on this instruction"));
  
-  p = frag_more (1 + size);
-  *p++ = i.tm.base_opcode;
+  p = frag_more (i.tm.opcode_length + size);
+  switch (i.tm.opcode_length)
+    {
+    case 2:
+      *p++ = i.tm.base_opcode >> 8;
+    case 1:
+      *p++ = i.tm.base_opcode;
+      break;
+    default:
+      abort ();
+    }
  
    fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
-                     i.op[0].disps, 1, reloc (size, 1, 1, i.reloc[0]));
+                     i.op[0].disps, 1, reloc (size, 1, 1,
+                                              (i.bnd_prefix != NULL
+                                               || add_bnd_prefix),
+                                              i.reloc[0]));
  
    /* All jumps handled here are signed, but don't use a signed limit
       check for 32 and 16 bit jumps as we want to allow wrap around at
@@ -5968,7 +6868,7 @@ output_interseg_jump (void)
      }
    else
      fix_new_exp (frag_now, p - frag_now->fr_literal, size,
-                i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
+                i.op[1].imms, 0, reloc (size, 0, 0, 0, i.reloc[1]));
    if (i.op[0].imms->X_op != O_constant)
      as_bad (_("can't handle non absolute segment in `%s'"),
             i.tm.name);
@@ -6005,9 +6905,9 @@ output_insn (void)
        unsigned int j;
        unsigned int prefix;
  
-      /* Since the VEX prefix contains the implicit prefix, we don't
-         need the explicit prefix.  */
-      if (!i.tm.opcode_modifier.vex)
+      /* Since the VEX/EVEX prefix contains the implicit prefix, we
+        don't need the explicit prefix.  */
+      if (!i.tm.opcode_modifier.vex && !i.tm.opcode_modifier.evex)
         {
           switch (i.tm.opcode_length)
             {
@@ -6045,8 +6945,7 @@ check_prefix:
             if (*q)
               FRAG_APPEND_1_CHAR (*q);
         }
-
-      if (i.tm.opcode_modifier.vex)
+      else
         {
           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
             if (*q)
@@ -6065,6 +6964,11 @@ check_prefix:
                   abort ();
                 }
  
+         /* For EVEX instructions i.vrex should become 0 after
+            build_evex_prefix.  For VEX instructions upper 16 registers
+            aren't available, so VREX should be 0.  */
+         if (i.vrex)
+           abort ();
           /* Now the VEX prefix.  */
           p = frag_more (i.vex.length);
           for (j = 0; j < i.vex.length; j++)
@@ -6080,6 +6984,11 @@ check_prefix:
         {
           switch (i.tm.opcode_length)
             {
+           case 4:
+             p = frag_more (4);
+             *p++ = (i.tm.base_opcode >> 24) & 0xff;
+             *p++ = (i.tm.base_opcode >> 16) & 0xff;
+             break;
             case 3:
               p = frag_more (3);
               *p++ = (i.tm.base_opcode >> 16) & 0xff;
@@ -6136,7 +7045,11 @@ static int
  disp_size (unsigned int n)
  {
    int size = 4;
-  if (i.types[n].bitfield.disp64)
+
+  /* Vec_Disp8 has to be 8bit.  */
+  if (i.types[n].bitfield.vec_disp8)
+    size = 1;
+  else if (i.types[n].bitfield.disp64)
      size = 8;
    else if (i.types[n].bitfield.disp8)
      size = 1;
@@ -6168,15 +7081,17 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off)
  
    for (n = 0; n < i.operands; n++)
      {
-      if (operand_type_check (i.types[n], disp))
+      if (i.types[n].bitfield.vec_disp8
+         || operand_type_check (i.types[n], disp))
         {
           if (i.op[n].disps->X_op == O_constant)
             {
               int size = disp_size (n);
-             offsetT val;
+             offsetT val = i.op[n].disps->X_add_number;
  
-             val = offset_in_range (i.op[n].disps->X_add_number,
-                                    size);
+             if (i.types[n].bitfield.vec_disp8)
+               val >>= i.memshift;
+             val = offset_in_range (val, size);
               p = frag_more (size);
               md_number_to_chars (p, val, size);
             }
@@ -6212,7 +7127,10 @@ output_disp (fragS *insn_start_frag, offsetT insn_start_off)
                 }
  
               p = frag_more (size);
-             reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
+             reloc_type = reloc (size, pcrel, sign,
+                                 (i.bnd_prefix != NULL
+                                  || add_bnd_prefix),
+                                 i.reloc[n]);
               if (GOT_symbol
                   && GOT_symbol == i.op[n].disps->X_add_symbol
                   && (((reloc_type == BFD_RELOC_32
@@ -6269,6 +7187,10 @@ output_imm (fragS *insn_start_frag, offsetT insn_start_off)
  
    for (n = 0; n < i.operands; n++)
      {
+      /* Skip SAE/RC Imm operand in EVEX.  They are already handled.  */
+      if (i.rounding && (int) n == i.rounding->operand)
+       continue;
+
        if (operand_type_check (i.types[n], imm))
         {
           if (i.op[n].imms->X_op == O_constant)
@@ -6299,7 +7221,7 @@ output_imm (fragS *insn_start_frag, offsetT insn_start_off)
                 sign = 0;
  
               p = frag_more (size);
-             reloc_type = reloc (size, 0, sign, i.reloc[n]);
+             reloc_type = reloc (size, 0, sign, 0, i.reloc[n]);
  
               /*   This is tough to explain.  We end up with this one if we
                * have operands that look like
@@ -6393,7 +7315,7 @@ void
  x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
                   expressionS *exp)
  {
-  enum bfd_reloc_code_real r = reloc (len, 0, cons_sign, got_reloc);
+  enum bfd_reloc_code_real r = reloc (len, 0, cons_sign, 0, got_reloc);
  
    got_reloc = NO_RELOC;
  
@@ -6408,8 +7330,20 @@ x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
    fix_new_exp (frag, off, len, exp, 0, r);
  }
  
-#if (!defined (OBJ_ELF) && !defined (OBJ_MAYBE_ELF)) || defined (LEX_AT)
-# define lex_got(reloc, adjust, types) NULL
+/* Export the ABI address size for use by TC_ADDRESS_BYTES for the
+   purpose of the `.dc.a' internal pseudo-op.  */
+
+int
+x86_address_bytes (void)
+{
+  if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
+    return 4;
+  return stdoutput->arch_info->bits_per_address / 8;
+}
+
+#if !(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
+    || defined (LEX_AT)
+# define lex_got(reloc, adjust, types, bnd_prefix) NULL
  #else
  /* Parse operands of the form
     <symbol>@GOTOFF+<nnn>
@@ -6423,7 +7357,8 @@ x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
  static char *
  lex_got (enum bfd_reloc_code_real *rel,
          int *adjust,
-        i386_operand_type *types)
+        i386_operand_type *types,
+        int bnd_prefix)
  {
    /* Some of the relocations depend on the size of what field is to
       be relocated.  But in our callers i386_immediate and i386_displacement
@@ -6436,6 +7371,11 @@ lex_got (enum bfd_reloc_code_real *rel,
      const enum bfd_reloc_code_real rel[2];
      const i386_operand_type types64;
    } gotrel[] = {
+#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
+    { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
+                                       BFD_RELOC_SIZE32 },
+      OPERAND_TYPE_IMM32_64 },
+#endif
      { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
                                        BFD_RELOC_X86_64_PLTOFF64 },
        OPERAND_TYPE_IMM64 },
@@ -6491,8 +7431,10 @@ lex_got (enum bfd_reloc_code_real *rel,
    char *cp;
    unsigned int j;
  
+#if defined (OBJ_MAYBE_ELF)
    if (!IS_ELF)
      return NULL;
+#endif
  
    for (cp = input_line_pointer; *cp != '@'; cp++)
      if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
@@ -6509,8 +7451,6 @@ lex_got (enum bfd_reloc_code_real *rel,
               char *tmpbuf, *past_reloc;
  
               *rel = gotrel[j].rel[object_64bit];
-             if (adjust)
-               *adjust = len;
  
               if (types)
                 {
@@ -6523,7 +7463,7 @@ lex_got (enum bfd_reloc_code_real *rel,
                     *types = gotrel[j].types64;
                 }
  
-             if (GOT_symbol == NULL)
+             if (j != 0 && GOT_symbol == NULL)
                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
  
               /* The length of the first part of our input line.  */
@@ -6545,8 +7485,16 @@ lex_got (enum bfd_reloc_code_real *rel,
                 /* Replace the relocation token with ' ', so that
                    errors like foo@GOTOFF1 will be detected.  */
                 tmpbuf[first++] = ' ';
+             else
+               /* Increment length by 1 if the relocation token is
+                  removed.  */
+               len++;
+             if (adjust)
+               *adjust = len;
               memcpy (tmpbuf + first, past_reloc, second);
               tmpbuf[first + second] = '\0';
+             if (bnd_prefix && *rel == BFD_RELOC_X86_64_PLT32)
+               *rel = BFD_RELOC_X86_64_PLT32_BND;
               return tmpbuf;
             }
  
@@ -6559,28 +7507,133 @@ lex_got (enum bfd_reloc_code_real *rel,
    /* Might be a symbol version string.  Don't as_bad here.  */
    return NULL;
  }
+#endif
  
-void
-x86_cons (expressionS *exp, int size)
+#ifdef TE_PE
+#ifdef lex_got
+#undef lex_got
+#endif
+/* Parse operands of the form
+   <symbol>@SECREL32+<nnn>
+
+   If we find one, set up the correct relocation in RELOC and copy the
+   input string, minus the `@SECREL32' into a malloc'd buffer for
+   parsing by the calling routine.  Return this buffer, and if ADJUST
+   is non-null set it to the length of the string we removed from the
+   input line.  Otherwise return NULL.
+
+   This function is copied from the ELF version above adjusted for PE targets.  */
+
+static char *
+lex_got (enum bfd_reloc_code_real *rel ATTRIBUTE_UNUSED,
+        int *adjust ATTRIBUTE_UNUSED,
+        i386_operand_type *types,
+        int bnd_prefix ATTRIBUTE_UNUSED)
  {
-  intel_syntax = -intel_syntax;
+  static const struct
+  {
+    const char *str;
+    int len;
+    const enum bfd_reloc_code_real rel[2];
+    const i386_operand_type types64;
+  }
+  gotrel[] =
+  {
+    { STRING_COMMA_LEN ("SECREL32"),    { BFD_RELOC_32_SECREL,
+                                         BFD_RELOC_32_SECREL },
+      OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
+  };
  
-  exp->X_md = 0;
-  if (size == 4 || (object_64bit && size == 8))
-    {
-      /* Handle @GOTOFF and the like in an expression.  */
-      char *save;
-      char *gotfree_input_line;
-      int adjust;
+  char *cp;
+  unsigned j;
  
-      save = input_line_pointer;
-      gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
-      if (gotfree_input_line)
-       input_line_pointer = gotfree_input_line;
+  for (cp = input_line_pointer; *cp != '@'; cp++)
+    if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
+      return NULL;
  
-      expression (exp);
+  for (j = 0; j < ARRAY_SIZE (gotrel); j++)
+    {
+      int len = gotrel[j].len;
  
-      if (gotfree_input_line)
+      if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
+       {
+         if (gotrel[j].rel[object_64bit] != 0)
+           {
+             int first, second;
+             char *tmpbuf, *past_reloc;
+
+             *rel = gotrel[j].rel[object_64bit];
+             if (adjust)
+               *adjust = len;
+
+             if (types)
+               {
+                 if (flag_code != CODE_64BIT)
+                   {
+                     types->bitfield.imm32 = 1;
+                     types->bitfield.disp32 = 1;
+                   }
+                 else
+                   *types = gotrel[j].types64;
+               }
+
+             /* The length of the first part of our input line.  */
+             first = cp - input_line_pointer;
+
+             /* The second part goes from after the reloc token until
+                (and including) an end_of_line char or comma.  */
+             past_reloc = cp + 1 + len;
+             cp = past_reloc;
+             while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
+               ++cp;
+             second = cp + 1 - past_reloc;
+
+             /* Allocate and copy string.  The trailing NUL shouldn't
+                be necessary, but be safe.  */
+             tmpbuf = (char *) xmalloc (first + second + 2);
+             memcpy (tmpbuf, input_line_pointer, first);
+             if (second != 0 && *past_reloc != ' ')
+               /* Replace the relocation token with ' ', so that
+                  errors like foo@SECLREL321 will be detected.  */
+               tmpbuf[first++] = ' ';
+             memcpy (tmpbuf + first, past_reloc, second);
+             tmpbuf[first + second] = '\0';
+             return tmpbuf;
+           }
+
+         as_bad (_("@%s reloc is not supported with %d-bit output format"),
+                 gotrel[j].str, 1 << (5 + object_64bit));
+         return NULL;
+       }
+    }
+
+  /* Might be a symbol version string.  Don't as_bad here.  */
+  return NULL;
+}
+
+#endif /* TE_PE */
+
+void
+x86_cons (expressionS *exp, int size)
+{
+  intel_syntax = -intel_syntax;
+
+  exp->X_md = 0;
+  if (size == 4 || (object_64bit && size == 8))
+    {
+      /* Handle @GOTOFF and the like in an expression.  */
+      char *save;
+      char *gotfree_input_line;
+      int adjust = 0;
+
+      save = input_line_pointer;
+      gotfree_input_line = lex_got (&got_reloc, &adjust, NULL, 0);
+      if (gotfree_input_line)
+       input_line_pointer = gotfree_input_line;
+
+      expression (exp);
+
+      if (gotfree_input_line)
         {
           /* expression () has merrily parsed up to the end of line,
              or a comma - in the wrong buffer.  Transfer how far
@@ -6610,7 +7663,6 @@ x86_cons (expressionS *exp, int size)
    if (intel_syntax)
      i386_intel_simplify (exp);
  }
-#endif
  
  static void
  signed_cons (int size)
@@ -6623,8 +7675,7 @@ signed_cons (int size)
  
  #ifdef TE_PE
  static void
-pe_directive_secrel (dummy)
-     int dummy ATTRIBUTE_UNUSED;
+pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
  {
    expressionS exp;
  
@@ -6643,6 +7694,140 @@ pe_directive_secrel (dummy)
  }
  #endif
  
+/* Handle Vector operations.  */
+
+static char *
+check_VecOperations (char *op_string, char *op_end)
+{
+  const reg_entry *mask;
+  const char *saved;
+  char *end_op;
+
+  while (*op_string
+        && (op_end == NULL || op_string < op_end))
+    {
+      saved = op_string;
+      if (*op_string == '{')
+       {
+         op_string++;
+
+         /* Check broadcasts.  */
+         if (strncmp (op_string, "1to", 3) == 0)
+           {
+             int bcst_type;
+
+             if (i.broadcast)
+               goto duplicated_vec_op;
+
+             op_string += 3;
+             if (*op_string == '8')
+               bcst_type = BROADCAST_1TO8;
+             else if (*op_string == '1'
+                      && *(op_string+1) == '6')
+               {
+                 bcst_type = BROADCAST_1TO16;
+                 op_string++;
+               }
+             else
+               {
+                 as_bad (_("Unsupported broadcast: `%s'"), saved);
+                 return NULL;
+               }
+             op_string++;
+
+             broadcast_op.type = bcst_type;
+             broadcast_op.operand = this_operand;
+             i.broadcast = &broadcast_op;
+           }
+         /* Check masking operation.  */
+         else if ((mask = parse_register (op_string, &end_op)) != NULL)
+           {
+             /* k0 can't be used for write mask.  */
+             if (mask->reg_num == 0)
+               {
+                 as_bad (_("`%s' can't be used for write mask"),
+                         op_string);
+                 return NULL;
+               }
+
+             if (!i.mask)
+               {
+                 mask_op.mask = mask;
+                 mask_op.zeroing = 0;
+                 mask_op.operand = this_operand;
+                 i.mask = &mask_op;
+               }
+             else
+               {
+                 if (i.mask->mask)
+                   goto duplicated_vec_op;
+
+                 i.mask->mask = mask;
+
+                 /* Only "{z}" is allowed here.  No need to check
+                    zeroing mask explicitly.  */
+                 if (i.mask->operand != this_operand)
+                   {
+                     as_bad (_("invalid write mask `%s'"), saved);
+                     return NULL;
+                   }
+               }
+
+             op_string = end_op;
+           }
+         /* Check zeroing-flag for masking operation.  */
+         else if (*op_string == 'z')
+           {
+             if (!i.mask)
+               {
+                 mask_op.mask = NULL;
+                 mask_op.zeroing = 1;
+                 mask_op.operand = this_operand;
+                 i.mask = &mask_op;
+               }
+             else
+               {
+                 if (i.mask->zeroing)
+                   {
+                   duplicated_vec_op:
+                     as_bad (_("duplicated `%s'"), saved);
+                     return NULL;
+                   }
+
+                 i.mask->zeroing = 1;
+
+                 /* Only "{%k}" is allowed here.  No need to check mask
+                    register explicitly.  */
+                 if (i.mask->operand != this_operand)
+                   {
+                     as_bad (_("invalid zeroing-masking `%s'"),
+                             saved);
+                     return NULL;
+                   }
+               }
+
+             op_string++;
+           }
+         else
+           goto unknown_vec_op;
+
+         if (*op_string != '}')
+           {
+             as_bad (_("missing `}' in `%s'"), saved);
+             return NULL;
+           }
+         op_string++;
+         continue;
+       }
+    unknown_vec_op:
+      /* We don't know this one.  */
+      as_bad (_("unknown vector operation: `%s'"), saved);
+      return NULL;
+    }
+
+  return op_string;
+}
+
  static int
  i386_immediate (char *imm_start)
  {
@@ -6670,13 +7855,25 @@ i386_immediate (char *imm_start)
    save_input_line_pointer = input_line_pointer;
    input_line_pointer = imm_start;
  
-  gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
+  gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types,
+                               (i.bnd_prefix != NULL
+                                || add_bnd_prefix));
    if (gotfree_input_line)
      input_line_pointer = gotfree_input_line;
  
    exp_seg = expression (exp);
  
    SKIP_WHITESPACE ();
+
+  /* Handle vector operations.  */
+  if (*input_line_pointer == '{')
+    {
+      input_line_pointer = check_VecOperations (input_line_pointer,
+                                               NULL);
+      if (input_line_pointer == NULL)
+       return 0;
+    }
+
    if (*input_line_pointer)
      as_bad (_("junk `%s' after expression"), input_line_pointer);
  
@@ -6917,7 +8114,9 @@ i386_displacement (char *disp_start, char *disp_end)
        *displacement_string_end = '0';
      }
  #endif
-  gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
+  gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types,
+                               (i.bnd_prefix != NULL
+                                || add_bnd_prefix));
    if (gotfree_input_line)
      input_line_pointer = gotfree_input_line;
  
@@ -7040,14 +8239,55 @@ i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
  static int
  i386_index_check (const char *operand_string)
  {
-  int ok;
    const char *kind = "base/index";
+  enum flag_code addr_mode;
+
+  if (i.prefix[ADDR_PREFIX])
+    addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
+  else
+    {
+      addr_mode = flag_code;
+
  #if INFER_ADDR_PREFIX
-  int fudged = 0;
+      if (i.mem_operands == 0)
+       {
+         /* Infer address prefix from the first memory operand.  */
+         const reg_entry *addr_reg = i.base_reg;
+
+         if (addr_reg == NULL)
+           addr_reg = i.index_reg;
  
- tryprefix:
+         if (addr_reg)
+           {
+             if (addr_reg->reg_num == RegEip
+                 || addr_reg->reg_num == RegEiz
+                 || addr_reg->reg_type.bitfield.reg32)
+               addr_mode = CODE_32BIT;
+             else if (flag_code != CODE_64BIT
+                      && addr_reg->reg_type.bitfield.reg16)
+               addr_mode = CODE_16BIT;
+
+             if (addr_mode != flag_code)
+               {
+                 i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
+                 i.prefixes += 1;
+                 /* Change the size of any displacement too.  At most one
+                    of Disp16 or Disp32 is set.
+                    FIXME.  There doesn't seem to be any real need for
+                    separate Disp16 and Disp32 flags.  The same goes for
+                    Imm16 and Imm32.  Removing them would probably clean
+                    up the code quite a lot.  */
+                 if (flag_code != CODE_64BIT
+                     && (i.types[this_operand].bitfield.disp16
+                         || i.types[this_operand].bitfield.disp32))
+                   i.types[this_operand]
+                     = operand_type_xor (i.types[this_operand], disp16_32);
+               }
+           }
+       }
  #endif
-  ok = 1;
+    }
+
    if (current_templates->start->opcode_modifier.isstring
        && !current_templates->start->opcode_modifier.immext
        && (current_templates->end[-1].opcode_modifier.isstring
@@ -7055,7 +8295,14 @@ i386_index_check (const char *operand_string)
      {
        /* Memory operands of string insns are special in that they only allow
          a single register (rDI, rSI, or rBX) as their memory address.  */
-      unsigned int expected;
+      const reg_entry *expected_reg;
+      static const char *di_si[][2] =
+       {
+         { "esi", "edi" },
+         { "si", "di" },
+         { "rsi", "rdi" }
+       };
+      static const char *bx[] = { "ebx", "bx", "rbx" };
  
        kind = "string address";
  
@@ -7068,75 +8315,71 @@ i386_index_check (const char *operand_string)
                   && current_templates->end[-1].operand_types[1]
                      .bitfield.baseindex))
             type = current_templates->end[-1].operand_types[1];
-         expected = type.bitfield.esseg ? 7 /* rDI */ : 6 /* rSI */;
+         expected_reg = hash_find (reg_hash,
+                                   di_si[addr_mode][type.bitfield.esseg]);
+
         }
        else
-       expected = 3 /* rBX */;
+       expected_reg = hash_find (reg_hash, bx[addr_mode]);
  
-      if (!i.base_reg || i.index_reg
+      if (i.base_reg != expected_reg
+         || i.index_reg
           || operand_type_check (i.types[this_operand], disp))
-       ok = -1;
-      else if (!(flag_code == CODE_64BIT
-                ? i.prefix[ADDR_PREFIX]
-                  ? i.base_reg->reg_type.bitfield.reg32
-                  : i.base_reg->reg_type.bitfield.reg64
-                : (flag_code == CODE_16BIT) ^ !i.prefix[ADDR_PREFIX]
-                  ? i.base_reg->reg_type.bitfield.reg32
-                  : i.base_reg->reg_type.bitfield.reg16))
-       ok = 0;
-      else if (i.base_reg->reg_num != expected)
-       ok = -1;
-
-      if (ok < 0)
-       {
-         unsigned int j;
-
-         for (j = 0; j < i386_regtab_size; ++j)
-           if ((flag_code == CODE_64BIT
-                ? i.prefix[ADDR_PREFIX]
-                  ? i386_regtab[j].reg_type.bitfield.reg32
-                  : i386_regtab[j].reg_type.bitfield.reg64
-                : (flag_code == CODE_16BIT) ^ !i.prefix[ADDR_PREFIX]
-                  ? i386_regtab[j].reg_type.bitfield.reg32
-                  : i386_regtab[j].reg_type.bitfield.reg16)
-               && i386_regtab[j].reg_num == expected)
-             break;
-         gas_assert (j < i386_regtab_size);
+       {
+         /* The second memory operand must have the same size as
+            the first one.  */
+         if (i.mem_operands
+             && i.base_reg
+             && !((addr_mode == CODE_64BIT
+                   && i.base_reg->reg_type.bitfield.reg64)
+                  || (addr_mode == CODE_32BIT
+                      ? i.base_reg->reg_type.bitfield.reg32
+                      : i.base_reg->reg_type.bitfield.reg16)))
+           goto bad_address;
+
           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
                    operand_string,
                    intel_syntax ? '[' : '(',
                    register_prefix,
-                  i386_regtab[j].reg_name,
+                  expected_reg->reg_name,
                    intel_syntax ? ']' : ')');
-         ok = 1;
-       }
-    }
-  else if (flag_code == CODE_64BIT)
-    {
-      if ((i.base_reg
-          && ((i.prefix[ADDR_PREFIX] == 0
-               && !i.base_reg->reg_type.bitfield.reg64)
-              || (i.prefix[ADDR_PREFIX]
-                  && !i.base_reg->reg_type.bitfield.reg32))
-          && (i.index_reg
-              || i.base_reg->reg_num !=
-                 (i.prefix[ADDR_PREFIX] == 0 ? RegRip : RegEip)))
-         || (i.index_reg
-             && (!i.index_reg->reg_type.bitfield.baseindex
-                 || (i.prefix[ADDR_PREFIX] == 0
-                     && i.index_reg->reg_num != RegRiz
-                     && !i.index_reg->reg_type.bitfield.reg64
-                     )
-                 || (i.prefix[ADDR_PREFIX]
-                     && i.index_reg->reg_num != RegEiz
-                     && !i.index_reg->reg_type.bitfield.reg32))))
-       ok = 0;
+         return 1;
+       }
+      else
+       return 1;
+
+bad_address:
+      as_bad (_("`%s' is not a valid %s expression"),
+             operand_string, kind);
+      return 0;
      }
    else
      {
-      if ((flag_code == CODE_16BIT) ^ (i.prefix[ADDR_PREFIX] != 0))
+      if (addr_mode != CODE_16BIT)
+       {
+         /* 32-bit/64-bit checks.  */
+         if ((i.base_reg
+              && (addr_mode == CODE_64BIT
+                  ? !i.base_reg->reg_type.bitfield.reg64
+                  : !i.base_reg->reg_type.bitfield.reg32)
+              && (i.index_reg
+                  || (i.base_reg->reg_num
+                      != (addr_mode == CODE_64BIT ? RegRip : RegEip))))
+             || (i.index_reg
+                 && !i.index_reg->reg_type.bitfield.regxmm
+                 && !i.index_reg->reg_type.bitfield.regymm
+                 && !i.index_reg->reg_type.bitfield.regzmm
+                 && ((addr_mode == CODE_64BIT
+                      ? !(i.index_reg->reg_type.bitfield.reg64
+                          || i.index_reg->reg_num == RegRiz)
+                      : !(i.index_reg->reg_type.bitfield.reg32
+                          || i.index_reg->reg_num == RegEiz))
+                     || !i.index_reg->reg_type.bitfield.baseindex)))
+           goto bad_address;
+       }
+      else
         {
-         /* 16bit checks.  */
+         /* 16-bit checks.  */
           if ((i.base_reg
                && (!i.base_reg->reg_type.bitfield.reg16
                    || !i.base_reg->reg_type.bitfield.baseindex))
@@ -7147,56 +8390,71 @@ i386_index_check (const char *operand_string)
                            && i.base_reg->reg_num < 6
                            && i.index_reg->reg_num >= 6
                            && i.log2_scale_factor == 0))))
-           ok = 0;
+           goto bad_address;
         }
-      else
+    }
+  return 1;
+}
+
+/* Handle vector immediates.  */
+
+static int
+RC_SAE_immediate (const char *imm_start)
+{
+  unsigned int match_found, j;
+  const char *pstr = imm_start;
+  expressionS *exp;
+
+  if (*pstr != '{')
+    return 0;
+
+  pstr++;
+  match_found = 0;
+  for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
+    {
+      if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
         {
-         /* 32bit checks.  */
-         if ((i.base_reg
-              && !i.base_reg->reg_type.bitfield.reg32)
-             || (i.index_reg
-                 && ((!i.index_reg->reg_type.bitfield.reg32
-                      && i.index_reg->reg_num != RegEiz)
-                     || !i.index_reg->reg_type.bitfield.baseindex)))
-           ok = 0;
+         if (!i.rounding)
+           {
+             rc_op.type = RC_NamesTable[j].type;
+             rc_op.operand = this_operand;
+             i.rounding = &rc_op;
+           }
+         else
+           {
+             as_bad (_("duplicated `%s'"), imm_start);
+             return 0;
+           }
+         pstr += RC_NamesTable[j].len;
+         match_found = 1;
+         break;
         }
      }
-  if (!ok)
+  if (!match_found)
+    return 0;
+
+  if (*pstr++ != '}')
      {
-#if INFER_ADDR_PREFIX
-      if (!i.mem_operands && !i.prefix[ADDR_PREFIX])
-       {
-         i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
-         i.prefixes += 1;
-         /* Change the size of any displacement too.  At most one of
-            Disp16 or Disp32 is set.
-            FIXME.  There doesn't seem to be any real need for separate
-            Disp16 and Disp32 flags.  The same goes for Imm16 and Imm32.
-            Removing them would probably clean up the code quite a lot.  */
-         if (flag_code != CODE_64BIT
-             && (i.types[this_operand].bitfield.disp16
-                 || i.types[this_operand].bitfield.disp32))
-           i.types[this_operand]
-             = operand_type_xor (i.types[this_operand], disp16_32);
-         fudged = 1;
-         goto tryprefix;
-       }
-      if (fudged)
-       as_bad (_("`%s' is not a valid %s expression"),
-               operand_string,
-               kind);
-      else
-#endif
-       as_bad (_("`%s' is not a valid %s-bit %s expression"),
-               operand_string,
-               flag_code_names[i.prefix[ADDR_PREFIX]
-                                        ? flag_code == CODE_32BIT
-                                          ? CODE_16BIT
-                                          : CODE_32BIT
-                                        : flag_code],
-               kind);
+      as_bad (_("Missing '}': '%s'"), imm_start);
+      return 0;
+    }
+  /* RC/SAE immediate string should contain nothing more.  */;
+  if (*pstr != 0)
+    {
+      as_bad (_("Junk after '}': '%s'"), imm_start);
+      return 0;
      }
-  return ok;
+
+  exp = &im_expressions[i.imm_operands++];
+  i.op[this_operand].imms = exp;
+
+  exp->X_op = O_constant;
+  exp->X_add_number = 0;
+  exp->X_add_symbol = (symbolS *) 0;
+  exp->X_op_symbol = (symbolS *) 0;
+
+  i.types[this_operand].bitfield.imm8 = 1;
+  return 1;
  }
  
  /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
@@ -7281,6 +8539,15 @@ i386_att_operand (char *operand_string)
             }
           goto do_memory_reference;
         }
+
+      /* Handle vector operations.  */
+      if (*op_string == '{')
+       {
+         op_string = check_VecOperations (op_string, NULL);
+         if (op_string == NULL)
+           return 0;
+       }
+
        if (*op_string)
         {
           as_bad (_("junk `%s' after register"), op_string);
@@ -7310,6 +8577,11 @@ i386_att_operand (char *operand_string)
        if (!i386_immediate (op_string))
         return 0;
      }
+  else if (RC_SAE_immediate (operand_string))
+    {
+      /* If it is a RC or SAE immediate, do nothing.  */
+      ;
+    }
    else if (is_digit_char (*op_string)
            || is_identifier_char (*op_string)
            || *op_string == '(')
@@ -7320,6 +8592,7 @@ i386_att_operand (char *operand_string)
        /* Start and end of displacement string expression (if found).  */
        char *displacement_string_start;
        char *displacement_string_end;
+      char *vop_start;
  
      do_memory_reference:
        if ((i.mem_operands == 1
@@ -7337,6 +8610,15 @@ i386_att_operand (char *operand_string)
          after the '('.  */
        base_string = op_string + strlen (op_string);
  
+      /* Handle vector operations.  */
+      vop_start = strchr (op_string, '{');
+      if (vop_start && vop_start < base_string)
+       {
+         if (check_VecOperations (vop_start, base_string) == NULL)
+           return 0;
+         base_string = vop_start;
+       }
+
        --base_string;
        if (is_space_char (*base_string))
         --base_string;
@@ -7412,6 +8694,9 @@ i386_att_operand (char *operand_string)
                     }
                   else if (*base_string == REGISTER_PREFIX)
                     {
+                     end_op = strchr (base_string, ',');
+                     if (end_op)
+                       *end_op = '\0';
                       as_bad (_("bad register name `%s'"), base_string);
                       return 0;
                     }
@@ -7453,6 +8738,9 @@ i386_att_operand (char *operand_string)
             }
           else if (*base_string == REGISTER_PREFIX)
             {
+             end_op = strchr (base_string, ',');
+             if (end_op)
+               *end_op = '\0';
               as_bad (_("bad register name `%s'"), base_string);
               return 0;
             }
@@ -7498,6 +8786,18 @@ i386_att_operand (char *operand_string)
    return 1;                    /* Normal return.  */
  }
  \f
+/* Calculate the maximum variable size (i.e., excluding fr_fix)
+   that an rs_machine_dependent frag may reach.  */
+
+unsigned int
+i386_frag_max_var (fragS *frag)
+{
+  /* The only relaxable frags are for jumps.
+     Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
+  gas_assert (frag->fr_type == rs_machine_dependent);
+  return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
+}
+
  /* md_estimate_size_before_relax()
  
     Called just before relax() for rs_machine_dependent frags.  The x86
@@ -7512,9 +8812,7 @@ i386_att_operand (char *operand_string)
     returned value.  */
  
  int
-md_estimate_size_before_relax (fragP, segment)
-     fragS *fragP;
-     segT segment;
+md_estimate_size_before_relax (fragS *fragP, segT segment)
  {
    /* We've already got fragP->fr_subtype right;  all we have to do is
       check for un-relaxable symbols.  On an ELF system, we can't relax
@@ -7637,10 +8935,8 @@ md_estimate_size_before_relax (fragP, segment)
         Caller will turn frag into a ".space 0".  */
  
  void
-md_convert_frag (abfd, sec, fragP)
-     bfd *abfd ATTRIBUTE_UNUSED;
-     segT sec ATTRIBUTE_UNUSED;
-     fragS *fragP;
+md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
+                 fragS *fragP)
  {
    unsigned char *opcode;
    unsigned char *where_to_put_displacement = NULL;
@@ -7737,21 +9033,17 @@ md_convert_frag (abfd, sec, fragP)
    fragP->fr_fix += extension;
  }
  \f
-/* Apply a fixup (fixS) to segment data, once it has been determined
+/* Apply a fixup (fixP) to segment data, once it has been determined
     by our caller that we have all the info we need to fix it up.
  
+   Parameter valP is the pointer to the value of the bits.
+
     On the 386, immediates, displacements, and data pointers are all in
     the same (little-endian) format, so we don't need to care about which
     we are handling.  */
  
  void
-md_apply_fix (fixP, valP, seg)
-     /* The fix we're to put in.  */
-     fixS *fixP;
-     /* Pointer to the value of the bits.  */
-     valueT *valP;
-     /* Segment fix is from.  */
-     segT seg ATTRIBUTE_UNUSED;
+md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
  {
    char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
    valueT value = *valP;
@@ -7784,7 +9076,8 @@ md_apply_fix (fixP, valP, seg)
        && (fixP->fx_r_type == BFD_RELOC_32_PCREL
           || fixP->fx_r_type == BFD_RELOC_64_PCREL
           || fixP->fx_r_type == BFD_RELOC_16_PCREL
-         || fixP->fx_r_type == BFD_RELOC_8_PCREL)
+         || fixP->fx_r_type == BFD_RELOC_8_PCREL
+         || fixP->fx_r_type == BFD_RELOC_X86_64_PC32_BND)
        && !use_rela_relocations)
      {
        /* This is a hack.  There should be a better way to handle this.
@@ -7840,6 +9133,7 @@ md_apply_fix (fixP, valP, seg)
        {
        case BFD_RELOC_386_PLT32:
        case BFD_RELOC_X86_64_PLT32:
+      case BFD_RELOC_X86_64_PLT32_BND:
         /* Make the jump instruction point to the address of the operand.  At
            runtime we merely add the offset to the actual PLT entry.  */
         value = -4;
@@ -8030,11 +9324,26 @@ parse_real_register (char *reg_string, char **end_op)
    if (r->reg_type.bitfield.regymm && !cpu_arch_flags.bitfield.cpuavx)
      return (const reg_entry *) NULL;
  
+  if ((r->reg_type.bitfield.regzmm || r->reg_type.bitfield.regmask)
+       && !cpu_arch_flags.bitfield.cpuavx512f)
+    return (const reg_entry *) NULL;
+
    /* Don't allow fake index register unless allow_index_reg isn't 0. */
    if (!allow_index_reg
        && (r->reg_num == RegEiz || r->reg_num == RegRiz))
      return (const reg_entry *) NULL;
  
+  /* Upper 16 vector register is only available with VREX in 64bit
+     mode.  */
+  if ((r->reg_flags & RegVRex))
+    {
+      if (!cpu_arch_flags.bitfield.cpuvrex
+         || flag_code != CODE_64BIT)
+       return (const reg_entry *) NULL;
+
+      i.need_vrex = 1;
+    }
+
    if (((r->reg_flags & (RegRex64 | RegRex))
         || r->reg_type.bitfield.reg64)
        && (!cpu_arch_flags.bitfield.cpulm
@@ -8163,14 +9472,18 @@ const char *md_shortopts = "qn";
  #define OPTION_MOLD_GCC (OPTION_MD_BASE + 9)
  #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
  #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
-#define OPTION_MAVXSCALAR (OPTION_MD_BASE + 12)
-#define OPTION_X32 (OPTION_MD_BASE + 13)
+#define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
+#define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
+#define OPTION_X32 (OPTION_MD_BASE + 14)
+#define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
+#define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
+#define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
  
  struct option md_longopts[] =
  {
    {"32", no_argument, NULL, OPTION_32},
  #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
-     || defined (TE_PE) || defined (TE_PEP))
+     || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
    {"64", no_argument, NULL, OPTION_64},
  #endif
  #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
@@ -8186,7 +9499,11 @@ struct option md_longopts[] =
    {"mold-gcc", no_argument, NULL, OPTION_MOLD_GCC},
    {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
    {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
+  {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
    {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
+  {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
+  {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
+  {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
    {NULL, no_argument, NULL, 0}
  };
  size_t md_longopts_size = sizeof (md_longopts);
@@ -8228,7 +9545,7 @@ md_parse_option (int c, char *arg)
        break;
  #endif
  #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
-     || defined (TE_PE) || defined (TE_PEP))
+     || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
      case OPTION_64:
        {
         const char **list, **l;
@@ -8238,13 +9555,14 @@ md_parse_option (int c, char *arg)
           if (CONST_STRNEQ (*l, "elf64-x86-64")
               || strcmp (*l, "coff-x86-64") == 0
               || strcmp (*l, "pe-x86-64") == 0
-             || strcmp (*l, "pei-x86-64") == 0)
+             || strcmp (*l, "pei-x86-64") == 0
+             || strcmp (*l, "mach-o-x86-64") == 0)
             {
               default_arch = "x86_64";
               break;
             }
         if (*l == NULL)
-         as_fatal (_("No compiled in support for x86_64"));
+         as_fatal (_("no compiled in support for x86_64"));
         free (list);
        }
        break;
@@ -8264,7 +9582,7 @@ md_parse_option (int c, char *arg)
                 break;
               }
           if (*l == NULL)
-           as_fatal (_("No compiled in support for 32bit x86_64"));
+           as_fatal (_("no compiled in support for 32bit x86_64"));
           free (list);
         }
        else
@@ -8298,7 +9616,7 @@ md_parse_option (int c, char *arg)
        do
         {
           if (*arch == '.')
-           as_fatal (_("Invalid -march= option: `%s'"), arg);
+           as_fatal (_("invalid -march= option: `%s'"), arg);
           next = strchr (arch, '+');
           if (next)
             *next++ = '\0';
@@ -8347,13 +9665,14 @@ md_parse_option (int c, char *arg)
                       else
                         cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
                       cpu_arch_flags = flags;
+                     cpu_arch_isa_flags = flags;
                     }
                   break;
                 }
             }
  
           if (j >= ARRAY_SIZE (cpu_arch))
-           as_fatal (_("Invalid -march= option: `%s'"), arg);
+           as_fatal (_("invalid -march= option: `%s'"), arg);
  
           arch = next;
         }
@@ -8362,7 +9681,7 @@ md_parse_option (int c, char *arg)
  
      case OPTION_MTUNE:
        if (*arg == '.')
-       as_fatal (_("Invalid -mtune= option: `%s'"), arg);
+       as_fatal (_("invalid -mtune= option: `%s'"), arg);
        for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
         {
           if (strcmp (arg, cpu_arch [j].name) == 0)
@@ -8374,7 +9693,7 @@ md_parse_option (int c, char *arg)
             }
         }
        if (j >= ARRAY_SIZE (cpu_arch))
-       as_fatal (_("Invalid -mtune= option: `%s'"), arg);
+       as_fatal (_("invalid -mtune= option: `%s'"), arg);
        break;
  
      case OPTION_MMNEMONIC:
@@ -8383,7 +9702,7 @@ md_parse_option (int c, char *arg)
        else if (strcasecmp (arg, "intel") == 0)
         intel_mnemonic = 1;
        else
-       as_fatal (_("Invalid -mmnemonic= option: `%s'"), arg);
+       as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
        break;
  
      case OPTION_MSYNTAX:
@@ -8392,7 +9711,7 @@ md_parse_option (int c, char *arg)
        else if (strcasecmp (arg, "intel") == 0)
         intel_syntax = 1;
        else
-       as_fatal (_("Invalid -msyntax= option: `%s'"), arg);
+       as_fatal (_("invalid -msyntax= option: `%s'"), arg);
        break;
  
      case OPTION_MINDEX_REG:
@@ -8413,13 +9732,24 @@ md_parse_option (int c, char *arg)
  
      case OPTION_MSSE_CHECK:
        if (strcasecmp (arg, "error") == 0)
-       sse_check = sse_check_error;
+       sse_check = check_error;
+      else if (strcasecmp (arg, "warning") == 0)
+       sse_check = check_warning;
+      else if (strcasecmp (arg, "none") == 0)
+       sse_check = check_none;
+      else
+       as_fatal (_("invalid -msse-check= option: `%s'"), arg);
+      break;
+
+    case OPTION_MOPERAND_CHECK:
+      if (strcasecmp (arg, "error") == 0)
+       operand_check = check_error;
        else if (strcasecmp (arg, "warning") == 0)
-       sse_check = sse_check_warning;
+       operand_check = check_warning;
        else if (strcasecmp (arg, "none") == 0)
-       sse_check = sse_check_none;
+       operand_check = check_none;
        else
-       as_fatal (_("Invalid -msse-check= option: `%s'"), arg);
+       as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
        break;
  
      case OPTION_MAVXSCALAR:
@@ -8428,7 +9758,31 @@ md_parse_option (int c, char *arg)
        else if (strcasecmp (arg, "256") == 0)
         avxscalar = vex256;
        else
-       as_fatal (_("Invalid -mavxscalar= option: `%s'"), arg);
+       as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
+      break;
+
+    case OPTION_MADD_BND_PREFIX:
+      add_bnd_prefix = 1;
+      break;
+
+    case OPTION_MEVEXLIG:
+      if (strcmp (arg, "128") == 0)
+       evexlig = evexl128;
+      else if (strcmp (arg, "256") == 0)
+       evexlig = evexl256;
+      else  if (strcmp (arg, "512") == 0)
+       evexlig = evexl512;
+      else
+       as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
+      break;
+
+    case OPTION_MEVEXWIG:
+      if (strcmp (arg, "0") == 0)
+       evexwig = evexw0;
+      else if (strcmp (arg, "1") == 0)
+       evexwig = evexw1;
+      else
+       as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
        break;
  
      default:
@@ -8505,7 +9859,7 @@ show_arch (FILE *stream, int ext, int check)
           fprintf (stream, "%s\n", message);
           p = start;
           left = size - (start - message) - len - 2;
-         
+
           gas_assert (left >= 0);
  
           p = mempcpy (p, name, len);
@@ -8560,9 +9914,18 @@ md_show_usage (FILE *stream)
    -msse-check=[none|error|warning]\n\
                            check SSE instructions\n"));
    fprintf (stream, _("\
+  -moperand-check=[none|error|warning]\n\
+                          check operand combinations for validity\n"));
+  fprintf (stream, _("\
    -mavxscalar=[128|256]   encode scalar AVX instructions with specific vector\n\
                             length\n"));
    fprintf (stream, _("\
+  -mevexlig=[128|256|512] encode scalar EVEX instructions with specific vector\n\
+                           length\n"));
+  fprintf (stream, _("\
+  -mevexwig=[0|1]         encode EVEX instructions with specific EVEX.W value\n\
+                           for EVEX.W bit ignored instructions\n"));
+  fprintf (stream, _("\
    -mmnemonic=[att|intel]  use AT&T/Intel mnemonic\n"));
    fprintf (stream, _("\
    -msyntax=[att|intel]    use AT&T/Intel syntax\n"));
@@ -8572,6 +9935,8 @@ md_show_usage (FILE *stream)
    -mnaked-reg             don't require `%%' prefix for registers\n"));
    fprintf (stream, _("\
    -mold-gcc               support old (<= 2.8.1) versions of gcc\n"));
+  fprintf (stream, _("\
+  -madd-bnd-prefix        add BND prefix for all valid branches\n"));
  }
  
  #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
@@ -8594,7 +9959,7 @@ i386_target_format (void)
    else if (!strcmp (default_arch, "i386"))
      update_code_flag (CODE_32BIT, 1);
    else
-    as_fatal (_("Unknown architecture"));
+    as_fatal (_("unknown architecture"));
  
    if (cpu_flags_all_zero (&cpu_arch_isa_flags))
      cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].flags;
@@ -8637,7 +10002,7 @@ i386_target_format (void)
           case X86_64_X32_ABI:
             use_rela_relocations = 1;
             object_64bit = 1;
-           disallow_64bit_disp = 1;
+           disallow_64bit_reloc = 1;
             format = ELF_TARGET_FORMAT32;
             break;
           }
@@ -8647,13 +10012,26 @@ i386_target_format (void)
               as_fatal (_("Intel L1OM is 64bit only"));
             return ELF_TARGET_L1OM_FORMAT;
           }
+       if (cpu_arch_isa == PROCESSOR_K1OM)
+         {
+           if (x86_elf_abi != X86_64_ABI)
+             as_fatal (_("Intel K1OM is 64bit only"));
+           return ELF_TARGET_K1OM_FORMAT;
+         }
         else
           return format;
        }
  #endif
  #if defined (OBJ_MACH_O)
      case bfd_target_mach_o_flavour:
-      return flag_code == CODE_64BIT ? "mach-o-x86-64" : "mach-o-i386";
+      if (flag_code == CODE_64BIT)
+       {
+         use_rela_relocations = 1;
+         object_64bit = 1;
+         return "mach-o-x86-64";
+       }
+      else
+       return "mach-o-i386";
  #endif
      default:
        abort ();
@@ -8706,8 +10084,7 @@ i386_elf_emit_arch_note (void)
  #endif
  \f
  symbolS *
-md_undefined_symbol (name)
-     char *name;
+md_undefined_symbol (char *name)
  {
    if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
        && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
@@ -8729,9 +10106,7 @@ md_undefined_symbol (name)
  /* Round up a section size to the appropriate boundary.  */
  
  valueT
-md_section_align (segment, size)
-     segT segment ATTRIBUTE_UNUSED;
-     valueT size;
+md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
  {
  #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
    if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
@@ -8802,16 +10177,35 @@ i386_validate_fix (fixS *fixp)
  }
  
  arelent *
-tc_gen_reloc (section, fixp)
-     asection *section ATTRIBUTE_UNUSED;
-     fixS *fixp;
+tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
  {
    arelent *rel;
    bfd_reloc_code_real_type code;
  
    switch (fixp->fx_r_type)
      {
+#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
+    case BFD_RELOC_SIZE32:
+    case BFD_RELOC_SIZE64:
+      if (S_IS_DEFINED (fixp->fx_addsy)
+         && !S_IS_EXTERNAL (fixp->fx_addsy))
+       {
+         /* Resolve size relocation against local symbol to size of
+            the symbol plus addend.  */
+         valueT value = S_GET_SIZE (fixp->fx_addsy) + fixp->fx_offset;
+         if (fixp->fx_r_type == BFD_RELOC_SIZE32
+             && !fits_in_unsigned_long (value))
+           as_bad_where (fixp->fx_file, fixp->fx_line,
+                         _("symbol size computation overflow"));
+         fixp->fx_addsy = NULL;
+         fixp->fx_subsy = NULL;
+         md_apply_fix (fixp, (valueT *) &value, NULL);
+         return NULL;
+       }
+#endif
+
      case BFD_RELOC_X86_64_PLT32:
+    case BFD_RELOC_X86_64_PLT32_BND:
      case BFD_RELOC_X86_64_GOT32:
      case BFD_RELOC_X86_64_GOTPCREL:
      case BFD_RELOC_386_PLT32:
@@ -8872,7 +10266,10 @@ tc_gen_reloc (section, fixp)
               break;
             case 1: code = BFD_RELOC_8_PCREL;  break;
             case 2: code = BFD_RELOC_16_PCREL; break;
-           case 4: code = BFD_RELOC_32_PCREL; break;
+           case 4:
+             code = (fixp->fx_r_type == BFD_RELOC_X86_64_PC32_BND
+                     ? fixp-> fx_r_type : BFD_RELOC_32_PCREL);
+             break;
  #ifdef BFD64
             case 8: code = BFD_RELOC_64_PCREL; break;
  #endif
@@ -8939,12 +10336,33 @@ tc_gen_reloc (section, fixp)
    /* Use the rela in 64bit mode.  */
    else
      {
+      if (disallow_64bit_reloc)
+       switch (code)
+         {
+         case BFD_RELOC_X86_64_DTPOFF64:
+         case BFD_RELOC_X86_64_TPOFF64:
+         case BFD_RELOC_64_PCREL:
+         case BFD_RELOC_X86_64_GOTOFF64:
+         case BFD_RELOC_X86_64_GOT64:
+         case BFD_RELOC_X86_64_GOTPCREL64:
+         case BFD_RELOC_X86_64_GOTPC64:
+         case BFD_RELOC_X86_64_GOTPLT64:
+         case BFD_RELOC_X86_64_PLTOFF64:
+           as_bad_where (fixp->fx_file, fixp->fx_line,
+                         _("cannot represent relocation type %s in x32 mode"),
+                         bfd_get_reloc_code_name (code));
+           break;
+         default:
+           break;
+         }
+
        if (!fixp->fx_pcrel)
         rel->addend = fixp->fx_offset;
        else
         switch (code)
           {
           case BFD_RELOC_X86_64_PLT32:
+         case BFD_RELOC_X86_64_PLT32_BND:
           case BFD_RELOC_X86_64_GOT32:
           case BFD_RELOC_X86_64_GOTPCREL:
           case BFD_RELOC_X86_64_TLSGD:
@@ -9031,6 +10449,16 @@ tc_x86_frame_initial_instructions (void)
  }
  
  int
+x86_dwarf2_addr_size (void)
+{
+#if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
+  if (x86_elf_abi == X86_64_X32_ABI)
+    return 4;
+#endif
+  return bfd_arch_bits_per_address (stdoutput) / 8;
+}
+
+int
  i386_elf_section_type (const char *str, size_t len)
  {
    if (flag_code == CODE_64BIT