Improve process_ea and introduce -OL

author Victor van den Elzen <victor.vde@gmail.com>

Tue, 31 Mar 2009 02:59:44 +0000 (04:59 +0200)

committer Victor van den Elzen <victor.vde@gmail.com>

Sat, 24 Jul 2010 20:00:12 +0000 (22:00 +0200)
author Victor van den Elzen <victor.vde@gmail.com>
Tue, 31 Mar 2009 02:59:44 +0000 (04:59 +0200)
committer Victor van den Elzen <victor.vde@gmail.com>
Sat, 24 Jul 2010 20:00:12 +0000 (22:00 +0200)
diff --git a/assemble.c b/assemble.c

index 3932ab0..dbda08d 100644 (file)
--- a/assemble.c
+++ b/assemble.c
@@ -751,36 +751,59 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
      }
  }
  
-static bool possible_sbyte(operand *o)
+static bool possible_sbyte(operand *o, int min_optimizing)
  {
      return o->wrt == NO_SEG && o->segment == NO_SEG &&
         !(o->opflags & OPFLAG_UNKNOWN) &&
-       optimizing >= 0 && !(o->type & STRICT);
+       optimizing >= min_optimizing && !(o->type & STRICT);
  }
  
  /* check that opn[op]  is a signed byte of size 16 or 32 */
-static bool is_sbyte16(operand *o)
+static bool is_sbyte16(operand *o, int min_optimizing)
  {
      int16_t v;
  
-    if (!possible_sbyte(o))
+    if (!possible_sbyte(o, min_optimizing))
         return false;
  
      v = o->offset;
      return v >= -128 && v <= 127;
  }
  
-static bool is_sbyte32(operand *o)
+static bool is_sbyte32(operand *o, int min_optimizing)
  {
      int32_t v;
  
-    if (!possible_sbyte(o))
+    if (!possible_sbyte(o, min_optimizing))
         return false;
  
      v = o->offset;
      return v >= -128 && v <= 127;
  }
  
+/* Check if o is zero of size 16 or 32 */
+static bool is_zero16(operand *o, int min_optimizing)
+{
+    int16_t v;
+
+    if (!possible_sbyte(o, min_optimizing))
+       return false;
+
+    v = o->offset;
+    return v == 0;
+}
+
+static bool is_zero32(operand *o, int min_optimizing)
+{
+    int32_t v;
+
+    if (!possible_sbyte(o, min_optimizing))
+       return false;
+
+    v = o->offset;
+    return v == 0;
+}
+
  /* Common construct */
  #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
  
@@ -882,7 +905,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
              break;
  
         case4(0140):
-            length += is_sbyte16(opx) ? 1 : 2;
+            length += is_sbyte16(opx, 0) ? 1 : 2;
              break;
  
         case4(0144):
@@ -891,7 +914,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
              break;
  
         case4(0150):
-            length += is_sbyte32(opx) ? 1 : 4;
+            length += is_sbyte32(opx, 0) ? 1 : 4;
              break;
  
         case4(0154):
@@ -922,7 +945,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
             break;
  
         case4(0250):
-            length += is_sbyte32(opx) ? 1 : 4;
+            length += is_sbyte32(opx, 0) ? 1 : 4;
              break;
  
         case4(0254):
@@ -1418,7 +1441,7 @@ static void gencode(int32_t segment, int64_t offset, int bits,
         case4(0140):
              data = opx->offset;
              warn_overflow_opd(opx, 2);
-            if (is_sbyte16(opx)) {
+            if (is_sbyte16(opx, 0)) {
                  bytes[0] = data;
                  out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
                      NO_SEG);
@@ -1433,7 +1456,7 @@ static void gencode(int32_t segment, int64_t offset, int bits,
         case4(0144):
             EMIT_REX();
              bytes[0] = *codes++;
-            if (is_sbyte16(opx))
+            if (is_sbyte16(opx, 0))
                  bytes[0] |= 2;  /* s-bit */
              out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
              offset++;
@@ -1442,7 +1465,7 @@ static void gencode(int32_t segment, int64_t offset, int bits,
         case4(0150):
              data = opx->offset;
              warn_overflow_opd(opx, 4);
-            if (is_sbyte32(opx)) {
+            if (is_sbyte32(opx, 0)) {
                  bytes[0] = data;
                  out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
                      NO_SEG);
@@ -1457,7 +1480,7 @@ static void gencode(int32_t segment, int64_t offset, int bits,
         case4(0154):
             EMIT_REX();
              bytes[0] = *codes++;
-            if (is_sbyte32(opx))
+            if (is_sbyte32(opx, 0))
                  bytes[0] |= 2;  /* s-bit */
              out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
              offset++;
@@ -1521,7 +1544,7 @@ static void gencode(int32_t segment, int64_t offset, int bits,
                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
                         "signed dword immediate exceeds bounds");
             }
-            if (is_sbyte32(opx)) {
+            if (is_sbyte32(opx, 0)) {
                  bytes[0] = data;
                  out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
                      NO_SEG);
@@ -2206,7 +2229,9 @@ static enum match_result matches(const struct itemplate *itemp,
  static ea *process_ea(operand * input, ea * output, int bits,
                       int addrbits, int rfield, opflags_t rflags)
  {
-    bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
+    bool byte_offs = !!(input->eaflags & EAF_BYTEOFFS);
+    bool word_offs = !!(input->eaflags & EAF_WORDOFFS);
+    bool no_offs   = !!(input->eaflags & EAF_NO_OFFS);
  
      output->rip = false;
  
@@ -2267,7 +2292,6 @@ static ea *process_ea(operand * input, ea * output, int bits,
              }
          } else {                /* it's an indirection */
              int i = input->indexreg, b = input->basereg, s = input->scale;
-            int32_t seg = input->segment;
              int hb = input->hintbase, ht = input->hinttype;
              int t, it, bt;             /* register numbers */
             opflags_t x, ix, bx;        /* register flags */
@@ -2295,7 +2319,7 @@ static ea *process_ea(operand * input, ea * output, int bits,
             if ((ix|bx) & (BITS32|BITS64)) {
                  /* it must be a 32/64-bit memory reference. Firstly we have
                   * to check that all registers involved are type E/Rxx. */
-               int32_t sok = BITS32|BITS64, o = input->offset;
+               int32_t sok = BITS32|BITS64;
  
                  if (it != -1) {
                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
@@ -2365,15 +2389,13 @@ static ea *process_ea(operand * input, ea * output, int bits,
                          mod = 0;
                      } else {
                          rm = (bt & 7);
-                        if (rm != REG_NUM_EBP && o == 0 &&
-                                seg == NO_SEG && !forw_ref &&
-                                !(input->eaflags &
-                                  (EAF_BYTEOFFS | EAF_WORDOFFS)))
+                        if (rm != REG_NUM_EBP &&
+                            (no_offs || is_zero32(input, -1)) &&
+                            !(byte_offs || word_offs))
                              mod = 0;
-                        else if (input->eaflags & EAF_BYTEOFFS ||
-                                 (o >= -128 && o <= 127 && seg == NO_SEG
-                                  && !forw_ref
-                                  && !(input->eaflags & EAF_WORDOFFS)))
+                        else if (byte_offs ||
+                                 (! word_offs && is_sbyte32(input, -1)) ||
+                                 (rm == REG_NUM_EBP && no_offs))
                              mod = 1;
                          else
                              mod = 2;
@@ -2413,15 +2435,13 @@ static ea *process_ea(operand * input, ea * output, int bits,
                          mod = 0;
                      } else {
                          base = (bt & 7);
-                        if (base != REG_NUM_EBP && o == 0 &&
-                                    seg == NO_SEG && !forw_ref &&
-                                    !(input->eaflags &
-                                      (EAF_BYTEOFFS | EAF_WORDOFFS)))
+                        if (base != REG_NUM_EBP &&
+                            (no_offs || is_zero32(input, -1)) &&
+                            !(byte_offs || word_offs))
                              mod = 0;
-                        else if (input->eaflags & EAF_BYTEOFFS ||
-                                 (o >= -128 && o <= 127 && seg == NO_SEG
-                                  && !forw_ref
-                                  && !(input->eaflags & EAF_WORDOFFS)))
+                        else if (byte_offs ||
+                                 (! word_offs && is_sbyte32(input, -1)) ||
+                                 (base == REG_NUM_EBP && no_offs))
                              mod = 1;
                          else
                              mod = 2;
@@ -2434,7 +2454,6 @@ static ea *process_ea(operand * input, ea * output, int bits,
                  }
              } else {            /* it's 16-bit */
                  int mod, rm;
-                int16_t o = input->offset;
  
                  /* check for 64-bit long mode */
                  if (addrbits == 64)
@@ -2504,13 +2523,13 @@ static ea *process_ea(operand * input, ea * output, int bits,
                  if (rm == -1)   /* can't happen, in theory */
                      return NULL;        /* so panic if it does */
  
-                if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
-                    !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
+                if (rm != 6 &&
+                    (no_offs || is_zero16(input, -1)) &&
+                    !(byte_offs || word_offs))
                      mod = 0;
-                else if (input->eaflags & EAF_BYTEOFFS ||
-                         (o >= -128 && o <= 127 && seg == NO_SEG
-                          && !forw_ref
-                          && !(input->eaflags & EAF_WORDOFFS)))
+                else if (byte_offs ||
+                         (! word_offs && is_sbyte16(input, -1)) ||
+                         (rm == 6 && no_offs))
                      mod = 1;
                  else
                      mod = 2;
diff --git a/doc/changes.src b/doc/changes.src

index 75c355b..b4e71e4 100644 (file)
--- a/doc/changes.src
+++ b/doc/changes.src
@@ -28,7 +28,7 @@ since 2007.
  
  \b Macros parameters range expansion introduced. See \k{mlmacrange}.
  
-\b Backward compatibility on expanging of local sigle macros restored.
+\b Backward compatibility on expanding of local single line macros restored.
  
  \b 8 bit relocations for \c{elf} and \c{bin} output formats are introduced.
  
@@ -59,7 +59,10 @@ since 2007.
  \b Tighten EA checks. We warn a user if there overflow in EA addressing.
  
  \b Make \c{-Ox} the default optimization level.  For the legacy
-  behavior, specify \c{-O0} explicitly.  See \k{opt-O}.
+  behavior, specify \c{-OL} explicitly.  See \k{opt-O}.
+
+\b Don't optimize displacements in \c{-O0}.  For the legacy
+  behavior, specify \c{-OL}.  See \k{opt-O}.
  
  \b Environment variables read with \c{%!} or tested with \c{%ifenv}
    can now contain non-identifier characters if surrounded by quotes.
@@ -858,7 +861,7 @@ it 0.98.28 to not confuse poor little apt-get.
  Changes from 0.98.07 release to 98.09b as of 28-Oct-2001
  
  \b More closely compatible with 0.98 when -O0 is implied
-or specified.  Not strictly identical, since backward 
+or specified.  Not strictly identical, since backward
  branches in range of short offsets are recognized, and signed
  byte values with no explicit size specification will be
  assembled as a single byte.
@@ -912,7 +915,7 @@ from the current BITS setting (16 or 32).
  
  
  \b       Removed the "outforms.h" file - it appears to be
-        someone's old backup of "outform.h". version "0.98.06e" 
+        someone's old backup of "outform.h". version "0.98.06e"
  
  01/09/01
  
@@ -948,9 +951,9 @@ from the current BITS setting (16 or 32).
  \S{cl-0.98bf (bug-fixed)} Version 0.98bf (bug-fixed)
  
  \b Fixed - elf and aoutb bug - shared libraries
-        - multiple "%include" bug in "-f obj"   
+        - multiple "%include" bug in "-f obj"
          - jcxz, jecxz bug
-        - unrecognized option bug in ndisasm 
+        - unrecognized option bug in ndisasm
  
  \S{cl-0.98.03 with John Coffman's changes released 27-Jul-2000} Version 0.98.03 with John Coffman's changes released 27-Jul-2000
  
@@ -973,7 +976,7 @@ This feature is controlled by a new command-line switch: "O",
  extra optimization passes, "-O1" allows up to 5 extra passes,
  and "-O2"(default), allows up to 10 extra optimization passes.
  
-\b Added a new directive:  'cpu XXX', where XXX is any of: 
+\b Added a new directive:  'cpu XXX', where XXX is any of:
  8086, 186, 286, 386, 486, 586, pentium, 686, PPro, P2, P3 or
  Katmai.  All are case insensitive.  All instructions will
  be selected only if they apply to the selected cpu or lower.
@@ -983,7 +986,7 @@ Corrected a couple of bugs in cpu-dependence in 'insns.dat'.
  the "bits 16/32" directive. This is nothing new, just conforms
  to a lot of other assemblers. (minor)
  
-\b Changed label allocation from 320/32 (10000 labels @ 200K+) 
+\b Changed label allocation from 320/32 (10000 labels @ 200K+)
  to 32/37 (1000 labels); makes running under DOS much easier.
  Since additional label space is allocated dynamically, this
  should have no effect on large programs with lots of labels.
@@ -1575,7 +1578,7 @@ on lines such as `dd 0.0,0.0,0.0,0.0,...'
  \b Fixed a subtle preprocessor bug whereby invoking one multi-line
  macro on the first line of the expansion of another, when the second
  had been invoked with a label defined before it, didn't expand the
-inner macro. 
+inner macro.
  
  \b Added internal.doc back in to the distribution archives - it was
  missing in 0.96 *blush*
diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src

index eb1fe5e..ffa06e6 100644 (file)
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@@ -345,7 +345,7 @@ notice, this list of conditions and the following disclaimer.
  \b Redistributions in binary form must reproduce the above copyright
  notice, this list of conditions and the following disclaimer in the
  documentation and/or other materials provided with the distribution.
-      
+
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
@@ -539,7 +539,7 @@ an intervening space. For example:
  \c nasm -f bin driver.asm -odriver.sys
  
  Note that this is a small o, and is different from a capital O , which
-is used to specify the number of optimisation passes required. See \k{opt-O}.
+is used to specify the optimization level. See \k{opt-O}.
  
  
  \S{opt-f} The \i\c{-f} Option: Specifying the \i{Output File Format}
@@ -840,28 +840,30 @@ argument, instructs NASM to replace its powerful \i{preprocessor}
  with a \i{stub preprocessor} which does nothing.
  
  
-\S{opt-O} The \i\c{-O} Option: Specifying \i{Multipass Optimization}
-
-NASM defaults to not optimizing operands which can fit into a signed byte.
-This means that if you want the shortest possible object code,
-you have to enable optimization.
+\S{opt-O} The \i\c{-O} Option: Specifying \i{Code Size Optimization}
  
  Using the \c{-O} option, you can tell NASM to carry out different
-levels of optimization.  The syntax is:
+levels of optimization. NASM defaults to full optimization.
+The syntax is:
  
  \b \c{-O0}: No optimization. All operands take their long forms,
          if a short form is not specified, except conditional jumps.
+
+\b \c{-OL}: Legacy optimization. Like \c{-O0}, but displacements
+        which will fit in a signed byte and do not contain references
+        to labels later in the file are optimized, unless the long form
+        is specified.
          This is intended to match NASM 0.98 behavior.
  
-\b \c{-O1}: Minimal optimization. As above, but immediate operands
+\b \c{-O1}: Minimal optimization. Like \c{-O0}, but immediate operands
          which will fit in a signed byte are optimized,
          unless the long form is specified.  Conditional jumps default
          to the long form unless otherwise specified.
  
-\b \c{-Ox} (where \c{x} is the actual letter \c{x}): Multipass optimization.
-        Minimize branch offsets and signed immediate bytes,
+\b \c{-Ox} (where \c{x} is the actual letter \c{x}): Full optimization.
+        Minimize branch offsets, displacements and immediates,
          overriding size specification unless the \c{strict} keyword
-        has been used (see \k{strict}).  For compatability with earlier
+        has been used (see \k{strict}). For compatibility with earlier
          releases, the letter \c{x} may also be any number greater than
          one. This number has no effect on the actual number of passes.
  
@@ -1843,8 +1845,8 @@ invent one using the macro processor.
  
  \H{strict} \i\c{STRICT}: Inhibiting Optimization
  
-When assembling with the optimizer set to level 2 or higher (see
-\k{opt-O}), NASM will use size specifiers (\c{BYTE}, \c{WORD},
+When assembling with the optimizer set to full (see \k{opt-O}),
+NASM will use size specifiers (\c{BYTE}, \c{WORD},
  \c{DWORD}, \c{QWORD}, \c{TWORD}, \c{OWORD} or \c{YWORD}), but will
  give them the smallest possible size. The keyword \c{STRICT} can be
  used to inhibit optimization and force a particular operand to be
@@ -4952,7 +4954,7 @@ be specified, even if it is the same as the internal name. The
  available attributes are:
  
  \b \c{resident} indicates that the exported name is to be kept
-resident by the system loader. This is an optimisation for
+resident by the system loader. This is an optimization for
  frequently used symbols imported by name.
  
  \b \c{nodata} indicates that the exported symbol is a function which
diff --git a/nasm.c b/nasm.c

index 7a63afb..f6a710a 100644 (file)
--- a/nasm.c
+++ b/nasm.c
@@ -674,14 +674,22 @@ static bool process_arg(char *p, char *q)
                     case '5': case '6': case '7': case '8': case '9':
                         opt = strtoul(param, &param, 10);
  
-                       /* -O0 -> optimizing == -1, 0.98 behaviour */
-                       /* -O1 -> optimizing == 0, 0.98.09 behaviour */
-                       if (opt < 2)
-                           optimizing = opt - 1;
+                        if (opt == 0)
+                            /* no optimization */
+                            optimizing = -2;
+                       else if (opt == 1)
+                            /* 0.98.09 behaviour */
+                           optimizing = 0;
                         else
                             optimizing = opt;
                         break;
  
+                    case 'L':
+                        /* 0.98 behaviour */
+                        param++;
+                        optimizing = -1;
+                        break;
+
                     case 'v':
                     case '+':
                         param++;
@@ -783,10 +791,11 @@ static bool process_arg(char *p, char *q)
                   "    -F format   select a debugging format\n\n"
                   "    -I<path>    adds a pathname to the include file path\n");
              printf
-                ("    -O<digit>   optimize branch offsets\n"
-                 "                -O0: No optimization (default)\n"
+                ("    -O<digit>   optimize code size\n"
+                 "                -O0: No optimization\n"
+                 "                -OL: Legacy optimization\n"
                   "                -O1: Minimal optimization\n"
-                 "                -Ox: Multipass optimization (recommended)\n\n"
+                 "                -Ox: Full optimization (default)\n\n"
                   "    -P<file>    pre-includes a file\n"
                   "    -D<macro>[=<value>] pre-defines a macro\n"
                   "    -U<macro>   undefines a macro\n"
diff --git a/nasm.h b/nasm.h

index 76f561f..7ff1902 100644 (file)
--- a/nasm.h
+++ b/nasm.h
@@ -480,7 +480,8 @@ enum ea_flags {                     /* special EA flags */
      EAF_TIMESTWO =  4,          /* really do EAX*2 not EAX+EAX */
      EAF_REL     =  8,          /* IP-relative addressing */
      EAF_ABS      = 16,         /* non-IP-relative addressing */
-    EAF_FSGS    = 32           /* fs/gs segment override present */
+    EAF_FSGS    = 32,          /* fs/gs segment override present */
+    EAF_NO_OFFS  = 64           /* no explicit offset in source */
  };
  
  enum eval_hint {                /* values for `hinttype' */
diff --git a/parser.c b/parser.c

index ea36e86..4585a5b 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -804,6 +804,7 @@ is_expression:
                      }
                  }
              } else {
+                result->oprs[operand].eaflags |= EAF_NO_OFFS;
                  o = 0;
                  result->oprs[operand].wrt = NO_SEG;
                  result->oprs[operand].segment = NO_SEG;
diff --git a/test/zero_displacement.asm b/test/zero_displacement.asm

new file mode 100644 (file)

index 0000000..5aa4743
--- /dev/null
+++ b/test/zero_displacement.asm
@@ -0,0 +1,37 @@
+;Testname=O0; Arguments=-O0 -fbin -ozero_displacement.bin; Files=stdout stderr zero_displacement.bin
+;Testname=OL; Arguments=-OL -fbin -ozero_displacement.bin; Files=stdout stderr zero_displacement.bin
+;Testname=O1; Arguments=-O1 -fbin -ozero_displacement.bin; Files=stdout stderr zero_displacement.bin
+;Testname=Ox; Arguments=-Ox -fbin -ozero_displacement.bin; Files=stdout stderr zero_displacement.bin
+
+bits 16
+
+mov ax, [bx]
+mov ax, [bx+0]
+
+mov ax, [bx+di]
+mov ax, [bx+di+0]
+
+mov ax, [bp]
+mov ax, [bp+0]
+
+bits 32
+
+mov eax, [eax]
+mov eax, [eax+0]
+
+mov eax, [eax+ebx]
+mov eax, [eax+ebx+0]
+
+mov eax, [ebp]
+mov eax, [ebp+0]
+
+bits 64
+
+mov eax, [rax]
+mov eax, [rax+0]
+
+mov eax, [rax+rbx]
+mov eax, [rax+rbx+0]
+
+mov eax, [rbp]
+mov eax, [rbp+0]
author	Victor van den Elzen <victor.vde@gmail.com>
	Tue, 31 Mar 2009 02:59:44 +0000 (04:59 +0200)
committer	Victor van den Elzen <victor.vde@gmail.com>
	Sat, 24 Jul 2010 20:00:12 +0000 (22:00 +0200)
assemble.c		patch \| blob \| history
doc/changes.src		patch \| blob \| history
doc/nasmdoc.src		patch \| blob \| history
nasm.c		patch \| blob \| history
nasm.h		patch \| blob \| history
parser.c		patch \| blob \| history
test/zero_displacement.asm	[new file with mode: 0644]	patch \| blob