x86: don't open code is_any_vex_encoding()

[external/binutils.git] / gas / config / tc-i386.c
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c

index 1b1b0a9..8263b15 100644 (file)
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -33,6 +33,17 @@
  #include "elf/x86-64.h"
  #include "opcodes/i386-init.h"
  
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#else
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#ifndef INT_MAX
+#define INT_MAX (int) (((unsigned) (-1)) >> 1)
+#endif
+#endif
+
  #ifndef REGISTER_WARNINGS
  #define REGISTER_WARNINGS 1
  #endif
@@ -1069,6 +1080,12 @@ static const arch_entry cpu_arch[] =
      CPU_MOVDIRI_FLAGS, 0 },
    { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
      CPU_MOVDIR64B_FLAGS, 0 },
+  { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
+    CPU_AVX512_BF16_FLAGS, 0 },
+  { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
+    CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
+  { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
+    CPU_ENQCMD_FLAGS, 0 },
  };
  
  static const noarch_entry cpu_noarch[] =
@@ -1108,6 +1125,9 @@ static const noarch_entry cpu_noarch[] =
    { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
    { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
    { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
+  { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
+  { STRING_COMMA_LEN ("noavx512_vp2intersect"), CPU_ANY_SHSTK_FLAGS },
+  { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
  };
  
  #ifdef I386COFF
@@ -1285,7 +1305,16 @@ i386_output_nops (char *where, const unsigned char *const *patt,
    /* Place the longer NOP first.  */
    int last;
    int offset;
-  const unsigned char *nops =  patt[max_single_nop_size - 1];
+  const unsigned char *nops;
+
+  if (max_single_nop_size < 1)
+    {
+      as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
+               max_single_nop_size);
+      return;
+    }
+
+  nops = patt[max_single_nop_size - 1];
  
    /* Use the smaller one if the requsted one isn't available.  */
    if (nops == NULL)
@@ -3919,7 +3948,10 @@ optimize_encoding (void)
                                 && i.tm.extension_opcode == 0x4)
                             || ((i.tm.base_opcode == 0xf6
                                  || i.tm.base_opcode == 0xc6)
-                               && i.tm.extension_opcode == 0x0)))))
+                               && i.tm.extension_opcode == 0x0)))
+                   || (fits_in_imm7 (i.op[0].imms->X_add_number)
+                       && i.tm.base_opcode == 0x83
+                       && i.tm.extension_opcode == 0x4)))
                || (i.types[0].bitfield.qword
                    && ((i.reg_operands == 2
                         && i.op[0].regs == i.op[1].regs
@@ -3933,6 +3965,7 @@ optimize_encoding (void)
      {
        /* Optimize: -O:
            andq $imm31, %r64   -> andl $imm31, %r32
+          andq $imm7, %r64    -> andl $imm7, %r32
            testq $imm31, %r64  -> testl $imm31, %r32
            xorq %r64, %r64     -> xorl %r32, %r32
            subq %r64, %r64     -> subl %r32, %r32
@@ -3966,8 +3999,7 @@ optimize_encoding (void)
             }
         }
      }
-  else if (optimize > 1
-          && i.reg_operands == 3
+  else if (i.reg_operands == 3
            && i.op[0].regs == i.op[1].regs
            && !i.types[2].bitfield.xmmword
            && (i.tm.opcode_modifier.vex
@@ -3975,10 +4007,10 @@ optimize_encoding (void)
                    && !i.rounding
                    && is_evex_encoding (&i.tm)
                    && (i.vec_encoding != vex_encoding_evex
+                      || cpu_arch_isa_flags.bitfield.cpuavx512vl
                        || i.tm.cpu_flags.bitfield.cpuavx512vl
                        || (i.tm.operand_types[2].bitfield.zmmword
-                          && i.types[2].bitfield.ymmword)
-                      || cpu_arch_isa_flags.bitfield.cpuavx512vl)))
+                          && i.types[2].bitfield.ymmword))))
            && ((i.tm.base_opcode == 0x55
                 || i.tm.base_opcode == 0x6655
                 || i.tm.base_opcode == 0x66df
@@ -3995,15 +4027,15 @@ optimize_encoding (void)
                 || i.tm.base_opcode == 0x6647)
                && i.tm.extension_opcode == None))
      {
-      /* Optimize: -O2:
+      /* Optimize: -O1:
            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
            vpsubq and vpsubw:
              EVEX VOP %zmmM, %zmmM, %zmmN
                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
              EVEX VOP %ymmM, %ymmM, %ymmN
                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
              VEX VOP %ymmM, %ymmM, %ymmN
                -> VEX VOP %xmmM, %xmmM, %xmmN
            VOP, one of vpandn and vpxor:
@@ -4012,17 +4044,17 @@ optimize_encoding (void)
            VOP, one of vpandnd and vpandnq:
              EVEX VOP %zmmM, %zmmM, %zmmN
                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
              EVEX VOP %ymmM, %ymmM, %ymmN
                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
            VOP, one of vpxord and vpxorq:
              EVEX VOP %zmmM, %zmmM, %zmmN
                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
              EVEX VOP %ymmM, %ymmM, %ymmN
                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
-              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
+              -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
            VOP, one of kxord and kxorq:
              VEX VOP %kM, %kM, %kN
                -> VEX kxorw %kM, %kM, %kN
@@ -4032,14 +4064,16 @@ optimize_encoding (void)
         */
        if (is_evex_encoding (&i.tm))
         {
-         if (i.vec_encoding == vex_encoding_evex)
-           i.tm.opcode_modifier.evex = EVEX128;
-         else
+         if (i.vec_encoding != vex_encoding_evex)
             {
               i.tm.opcode_modifier.vex = VEX128;
               i.tm.opcode_modifier.vexw = VEXW0;
               i.tm.opcode_modifier.evex = 0;
             }
+         else if (optimize > 1)
+           i.tm.opcode_modifier.evex = EVEX128;
+         else
+           return;
         }
        else if (i.tm.operand_types[0].bitfield.regmask)
         {
@@ -4056,6 +4090,73 @@ optimize_encoding (void)
             i.types[j].bitfield.ymmword = 0;
           }
      }
+  else if (i.vec_encoding != vex_encoding_evex
+          && !i.types[0].bitfield.zmmword
+          && !i.types[1].bitfield.zmmword
+          && !i.mask
+          && is_evex_encoding (&i.tm)
+          && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
+              || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
+              || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
+          && i.tm.extension_opcode == None)
+    {
+      /* Optimize: -O1:
+          VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
+          vmovdqu32 and vmovdqu64:
+            EVEX VOP %xmmM, %xmmN
+              -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
+            EVEX VOP %ymmM, %ymmN
+              -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
+            EVEX VOP %xmmM, mem
+              -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
+            EVEX VOP %ymmM, mem
+              -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
+            EVEX VOP mem, %xmmN
+              -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
+            EVEX VOP mem, %ymmN
+              -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
+       */
+      for (j = 0; j < 2; j++)
+       if (operand_type_check (i.types[j], disp)
+           && i.op[j].disps->X_op == O_constant)
+         {
+           /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
+              has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
+              bytes, we choose EVEX Disp8 over VEX Disp32.  */
+           int evex_disp8, vex_disp8;
+           unsigned int memshift = i.memshift;
+           offsetT n = i.op[j].disps->X_add_number;
+
+           evex_disp8 = fits_in_disp8 (n);
+           i.memshift = 0;
+           vex_disp8 = fits_in_disp8 (n);
+           if (evex_disp8 != vex_disp8)
+             {
+               i.memshift = memshift;
+               return;
+             }
+
+           i.types[j].bitfield.disp8 = vex_disp8;
+           break;
+         }
+      if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
+       i.tm.base_opcode ^= 0xf36f ^ 0xf26f;
+      i.tm.opcode_modifier.vex
+       = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
+      i.tm.opcode_modifier.vexw = VEXW0;
+      i.tm.opcode_modifier.evex = 0;
+      i.tm.opcode_modifier.masking = 0;
+      i.tm.opcode_modifier.disp8memshift = 0;
+      i.memshift = 0;
+      for (j = 0; j < 2; j++)
+       if (operand_type_check (i.types[j], disp)
+           && i.op[j].disps->X_op == O_constant)
+         {
+           i.types[j].bitfield.disp8
+             = fits_in_disp8 (i.op[j].disps->X_add_number);
+           break;
+         }
+    }
  }
  
  /* This is the guts of the machine-dependent assembler.  LINE points to a
@@ -6160,7 +6261,19 @@ process_suffix (void)
            /* exclude fldenv/frstor/fsave/fstenv */
            && i.tm.opcode_modifier.no_ssuf)
      {
-      i.suffix = stackop_size;
+      if (stackop_size == LONG_MNEM_SUFFIX
+         && i.tm.base_opcode == 0xcf)
+       {
+         /* stackop_size is set to LONG_MNEM_SUFFIX for the
+            .code16gcc directive to support 16-bit mode with
+            32-bit address.  For IRET without a suffix, generate
+            16-bit IRET (opcode 0xcf) to return from an interrupt
+            handler.  */
+         i.suffix = WORD_MNEM_SUFFIX;
+         as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
+       }
+      else
+       i.suffix = stackop_size;
      }
    else if (intel_syntax
            && !i.suffix
@@ -6273,9 +6386,7 @@ process_suffix (void)
        else if (i.suffix != QWORD_MNEM_SUFFIX
                && !i.tm.opcode_modifier.ignoresize
                && !i.tm.opcode_modifier.floatmf
-              && !i.tm.opcode_modifier.vex
-              && !i.tm.opcode_modifier.vexopcode
-              && !is_evex_encoding (&i.tm)
+              && !is_any_vex_encoding (&i.tm)
                && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
                    || (flag_code == CODE_64BIT
                        && i.tm.opcode_modifier.jumpbyte)))
@@ -7705,6 +7816,12 @@ need_plt32_p (symbolS *s)
    if (!IS_ELF)
      return FALSE;
  
+#ifdef TE_SOLARIS
+  /* Don't emit PLT32 relocation on Solaris: neither native linker nor
+     krtld support it.  */
+  return FALSE;
+#endif
+
    /* Since there is no need to prepare for PLT branch on x86-64, we
       can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
       be used as a marker for 32-bit PC-relative branches.  */
@@ -8062,6 +8179,8 @@ output_insn (void)
         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2;
        if (i.tm.cpu_flags.bitfield.cpuavx512_vnni)
         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VNNI;
+      if (i.tm.cpu_flags.bitfield.cpuavx512_bf16)
+       x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BF16;
  
        if (i.tm.cpu_flags.bitfield.cpu8087
           || i.tm.cpu_flags.bitfield.cpu287
@@ -11342,7 +11461,7 @@ md_parse_option (int c, const char *arg)
         {
           optimize_for_space = 1;
           /* Turn on all encoding optimizations.  */
-         optimize = -1;
+         optimize = INT_MAX;
         }
        else
         {