}
}
-static bool possible_sbyte(operand *o)
+static bool possible_sbyte(operand *o, int min_optimizing)
{
return o->wrt == NO_SEG && o->segment == NO_SEG &&
!(o->opflags & OPFLAG_UNKNOWN) &&
- optimizing >= 0 && !(o->type & STRICT);
+ optimizing >= min_optimizing && !(o->type & STRICT);
}
/* check that opn[op] is a signed byte of size 16 or 32 */
-static bool is_sbyte16(operand *o)
+static bool is_sbyte16(operand *o, int min_optimizing)
{
int16_t v;
- if (!possible_sbyte(o))
+ if (!possible_sbyte(o, min_optimizing))
return false;
v = o->offset;
return v >= -128 && v <= 127;
}
-static bool is_sbyte32(operand *o)
+static bool is_sbyte32(operand *o, int min_optimizing)
{
int32_t v;
- if (!possible_sbyte(o))
+ if (!possible_sbyte(o, min_optimizing))
return false;
v = o->offset;
return v >= -128 && v <= 127;
}
+/* Check if o is zero of size 16 or 32 */
+static bool is_zero16(operand *o, int min_optimizing)
+{
+ int16_t v;
+
+ if (!possible_sbyte(o, min_optimizing))
+ return false;
+
+ v = o->offset;
+ return v == 0;
+}
+
+static bool is_zero32(operand *o, int min_optimizing)
+{
+ int32_t v;
+
+ if (!possible_sbyte(o, min_optimizing))
+ return false;
+
+ v = o->offset;
+ return v == 0;
+}
+
/* Common construct */
#define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
break;
case4(0140):
- length += is_sbyte16(opx) ? 1 : 2;
+ length += is_sbyte16(opx, 0) ? 1 : 2;
break;
case4(0144):
break;
case4(0150):
- length += is_sbyte32(opx) ? 1 : 4;
+ length += is_sbyte32(opx, 0) ? 1 : 4;
break;
case4(0154):
break;
case4(0250):
- length += is_sbyte32(opx) ? 1 : 4;
+ length += is_sbyte32(opx, 0) ? 1 : 4;
break;
case4(0254):
case4(0140):
data = opx->offset;
warn_overflow_opd(opx, 2);
- if (is_sbyte16(opx)) {
+ if (is_sbyte16(opx, 0)) {
bytes[0] = data;
out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
NO_SEG);
case4(0144):
EMIT_REX();
bytes[0] = *codes++;
- if (is_sbyte16(opx))
+ if (is_sbyte16(opx, 0))
bytes[0] |= 2; /* s-bit */
out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
offset++;
case4(0150):
data = opx->offset;
warn_overflow_opd(opx, 4);
- if (is_sbyte32(opx)) {
+ if (is_sbyte32(opx, 0)) {
bytes[0] = data;
out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
NO_SEG);
case4(0154):
EMIT_REX();
bytes[0] = *codes++;
- if (is_sbyte32(opx))
+ if (is_sbyte32(opx, 0))
bytes[0] |= 2; /* s-bit */
out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
offset++;
errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
"signed dword immediate exceeds bounds");
}
- if (is_sbyte32(opx)) {
+ if (is_sbyte32(opx, 0)) {
bytes[0] = data;
out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
NO_SEG);
static ea *process_ea(operand * input, ea * output, int bits,
int addrbits, int rfield, opflags_t rflags)
{
- bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
+ bool byte_offs = !!(input->eaflags & EAF_BYTEOFFS);
+ bool word_offs = !!(input->eaflags & EAF_WORDOFFS);
+ bool no_offs = !!(input->eaflags & EAF_NO_OFFS);
output->rip = false;
}
} else { /* it's an indirection */
int i = input->indexreg, b = input->basereg, s = input->scale;
- int32_t seg = input->segment;
int hb = input->hintbase, ht = input->hinttype;
int t, it, bt; /* register numbers */
opflags_t x, ix, bx; /* register flags */
if ((ix|bx) & (BITS32|BITS64)) {
/* it must be a 32/64-bit memory reference. Firstly we have
* to check that all registers involved are type E/Rxx. */
- int32_t sok = BITS32|BITS64, o = input->offset;
+ int32_t sok = BITS32|BITS64;
if (it != -1) {
if (!(REG64 & ~ix) || !(REG32 & ~ix))
mod = 0;
} else {
rm = (bt & 7);
- if (rm != REG_NUM_EBP && o == 0 &&
- seg == NO_SEG && !forw_ref &&
- !(input->eaflags &
- (EAF_BYTEOFFS | EAF_WORDOFFS)))
+ if (rm != REG_NUM_EBP &&
+ (no_offs || is_zero32(input, -1)) &&
+ !(byte_offs || word_offs))
mod = 0;
- else if (input->eaflags & EAF_BYTEOFFS ||
- (o >= -128 && o <= 127 && seg == NO_SEG
- && !forw_ref
- && !(input->eaflags & EAF_WORDOFFS)))
+ else if (byte_offs ||
+ (! word_offs && is_sbyte32(input, -1)) ||
+ (rm == REG_NUM_EBP && no_offs))
mod = 1;
else
mod = 2;
mod = 0;
} else {
base = (bt & 7);
- if (base != REG_NUM_EBP && o == 0 &&
- seg == NO_SEG && !forw_ref &&
- !(input->eaflags &
- (EAF_BYTEOFFS | EAF_WORDOFFS)))
+ if (base != REG_NUM_EBP &&
+ (no_offs || is_zero32(input, -1)) &&
+ !(byte_offs || word_offs))
mod = 0;
- else if (input->eaflags & EAF_BYTEOFFS ||
- (o >= -128 && o <= 127 && seg == NO_SEG
- && !forw_ref
- && !(input->eaflags & EAF_WORDOFFS)))
+ else if (byte_offs ||
+ (! word_offs && is_sbyte32(input, -1)) ||
+ (base == REG_NUM_EBP && no_offs))
mod = 1;
else
mod = 2;
}
} else { /* it's 16-bit */
int mod, rm;
- int16_t o = input->offset;
/* check for 64-bit long mode */
if (addrbits == 64)
if (rm == -1) /* can't happen, in theory */
return NULL; /* so panic if it does */
- if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
- !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
+ if (rm != 6 &&
+ (no_offs || is_zero16(input, -1)) &&
+ !(byte_offs || word_offs))
mod = 0;
- else if (input->eaflags & EAF_BYTEOFFS ||
- (o >= -128 && o <= 127 && seg == NO_SEG
- && !forw_ref
- && !(input->eaflags & EAF_WORDOFFS)))
+ else if (byte_offs ||
+ (! word_offs && is_sbyte16(input, -1)) ||
+ (rm == 6 && no_offs))
mod = 1;
else
mod = 2;
\b Macros parameters range expansion introduced. See \k{mlmacrange}.
-\b Backward compatibility on expanging of local sigle macros restored.
+\b Backward compatibility on expanding of local single line macros restored.
\b 8 bit relocations for \c{elf} and \c{bin} output formats are introduced.
\b Tighten EA checks. We warn a user if there overflow in EA addressing.
\b Make \c{-Ox} the default optimization level. For the legacy
- behavior, specify \c{-O0} explicitly. See \k{opt-O}.
+ behavior, specify \c{-OL} explicitly. See \k{opt-O}.
+
+\b Don't optimize displacements in \c{-O0}. For the legacy
+ behavior, specify \c{-OL}. See \k{opt-O}.
\b Environment variables read with \c{%!} or tested with \c{%ifenv}
can now contain non-identifier characters if surrounded by quotes.
Changes from 0.98.07 release to 98.09b as of 28-Oct-2001
\b More closely compatible with 0.98 when -O0 is implied
-or specified. Not strictly identical, since backward
+or specified. Not strictly identical, since backward
branches in range of short offsets are recognized, and signed
byte values with no explicit size specification will be
assembled as a single byte.
\b Removed the "outforms.h" file - it appears to be
- someone's old backup of "outform.h". version "0.98.06e"
+ someone's old backup of "outform.h". version "0.98.06e"
01/09/01
\S{cl-0.98bf (bug-fixed)} Version 0.98bf (bug-fixed)
\b Fixed - elf and aoutb bug - shared libraries
- - multiple "%include" bug in "-f obj"
+ - multiple "%include" bug in "-f obj"
- jcxz, jecxz bug
- - unrecognized option bug in ndisasm
+ - unrecognized option bug in ndisasm
\S{cl-0.98.03 with John Coffman's changes released 27-Jul-2000} Version 0.98.03 with John Coffman's changes released 27-Jul-2000
extra optimization passes, "-O1" allows up to 5 extra passes,
and "-O2"(default), allows up to 10 extra optimization passes.
-\b Added a new directive: 'cpu XXX', where XXX is any of:
+\b Added a new directive: 'cpu XXX', where XXX is any of:
8086, 186, 286, 386, 486, 586, pentium, 686, PPro, P2, P3 or
Katmai. All are case insensitive. All instructions will
be selected only if they apply to the selected cpu or lower.
the "bits 16/32" directive. This is nothing new, just conforms
to a lot of other assemblers. (minor)
-\b Changed label allocation from 320/32 (10000 labels @ 200K+)
+\b Changed label allocation from 320/32 (10000 labels @ 200K+)
to 32/37 (1000 labels); makes running under DOS much easier.
Since additional label space is allocated dynamically, this
should have no effect on large programs with lots of labels.
\b Fixed a subtle preprocessor bug whereby invoking one multi-line
macro on the first line of the expansion of another, when the second
had been invoked with a label defined before it, didn't expand the
-inner macro.
+inner macro.
\b Added internal.doc back in to the distribution archives - it was
missing in 0.96 *blush*
\b Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
-
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
\c nasm -f bin driver.asm -odriver.sys
Note that this is a small o, and is different from a capital O , which
-is used to specify the number of optimisation passes required. See \k{opt-O}.
+is used to specify the optimization level. See \k{opt-O}.
\S{opt-f} The \i\c{-f} Option: Specifying the \i{Output File Format}
with a \i{stub preprocessor} which does nothing.
-\S{opt-O} The \i\c{-O} Option: Specifying \i{Multipass Optimization}
-
-NASM defaults to not optimizing operands which can fit into a signed byte.
-This means that if you want the shortest possible object code,
-you have to enable optimization.
+\S{opt-O} The \i\c{-O} Option: Specifying \i{Code Size Optimization}
Using the \c{-O} option, you can tell NASM to carry out different
-levels of optimization. The syntax is:
+levels of optimization. NASM defaults to full optimization.
+The syntax is:
\b \c{-O0}: No optimization. All operands take their long forms,
if a short form is not specified, except conditional jumps.
+
+\b \c{-OL}: Legacy optimization. Like \c{-O0}, but displacements
+ which will fit in a signed byte and do not contain references
+ to labels later in the file are optimized, unless the long form
+ is specified.
This is intended to match NASM 0.98 behavior.
-\b \c{-O1}: Minimal optimization. As above, but immediate operands
+\b \c{-O1}: Minimal optimization. Like \c{-O0}, but immediate operands
which will fit in a signed byte are optimized,
unless the long form is specified. Conditional jumps default
to the long form unless otherwise specified.
-\b \c{-Ox} (where \c{x} is the actual letter \c{x}): Multipass optimization.
- Minimize branch offsets and signed immediate bytes,
+\b \c{-Ox} (where \c{x} is the actual letter \c{x}): Full optimization.
+ Minimize branch offsets, displacements and immediates,
overriding size specification unless the \c{strict} keyword
- has been used (see \k{strict}). For compatability with earlier
+ has been used (see \k{strict}). For compatibility with earlier
releases, the letter \c{x} may also be any number greater than
one. This number has no effect on the actual number of passes.
\H{strict} \i\c{STRICT}: Inhibiting Optimization
-When assembling with the optimizer set to level 2 or higher (see
-\k{opt-O}), NASM will use size specifiers (\c{BYTE}, \c{WORD},
+When assembling with the optimizer set to full (see \k{opt-O}),
+NASM will use size specifiers (\c{BYTE}, \c{WORD},
\c{DWORD}, \c{QWORD}, \c{TWORD}, \c{OWORD} or \c{YWORD}), but will
give them the smallest possible size. The keyword \c{STRICT} can be
used to inhibit optimization and force a particular operand to be
available attributes are:
\b \c{resident} indicates that the exported name is to be kept
-resident by the system loader. This is an optimisation for
+resident by the system loader. This is an optimization for
frequently used symbols imported by name.
\b \c{nodata} indicates that the exported symbol is a function which
case '5': case '6': case '7': case '8': case '9':
opt = strtoul(param, ¶m, 10);
- /* -O0 -> optimizing == -1, 0.98 behaviour */
- /* -O1 -> optimizing == 0, 0.98.09 behaviour */
- if (opt < 2)
- optimizing = opt - 1;
+ if (opt == 0)
+ /* no optimization */
+ optimizing = -2;
+ else if (opt == 1)
+ /* 0.98.09 behaviour */
+ optimizing = 0;
else
optimizing = opt;
break;
+ case 'L':
+ /* 0.98 behaviour */
+ param++;
+ optimizing = -1;
+ break;
+
case 'v':
case '+':
param++;
" -F format select a debugging format\n\n"
" -I<path> adds a pathname to the include file path\n");
printf
- (" -O<digit> optimize branch offsets\n"
- " -O0: No optimization (default)\n"
+ (" -O<digit> optimize code size\n"
+ " -O0: No optimization\n"
+ " -OL: Legacy optimization\n"
" -O1: Minimal optimization\n"
- " -Ox: Multipass optimization (recommended)\n\n"
+ " -Ox: Full optimization (default)\n\n"
" -P<file> pre-includes a file\n"
" -D<macro>[=<value>] pre-defines a macro\n"
" -U<macro> undefines a macro\n"
EAF_TIMESTWO = 4, /* really do EAX*2 not EAX+EAX */
EAF_REL = 8, /* IP-relative addressing */
EAF_ABS = 16, /* non-IP-relative addressing */
- EAF_FSGS = 32 /* fs/gs segment override present */
+ EAF_FSGS = 32, /* fs/gs segment override present */
+ EAF_NO_OFFS = 64 /* no explicit offset in source */
};
enum eval_hint { /* values for `hinttype' */
}
}
} else {
+ result->oprs[operand].eaflags |= EAF_NO_OFFS;
o = 0;
result->oprs[operand].wrt = NO_SEG;
result->oprs[operand].segment = NO_SEG;
--- /dev/null
+;Testname=O0; Arguments=-O0 -fbin -ozero_displacement.bin; Files=stdout stderr zero_displacement.bin
+;Testname=OL; Arguments=-OL -fbin -ozero_displacement.bin; Files=stdout stderr zero_displacement.bin
+;Testname=O1; Arguments=-O1 -fbin -ozero_displacement.bin; Files=stdout stderr zero_displacement.bin
+;Testname=Ox; Arguments=-Ox -fbin -ozero_displacement.bin; Files=stdout stderr zero_displacement.bin
+
+bits 16
+
+mov ax, [bx]
+mov ax, [bx+0]
+
+mov ax, [bx+di]
+mov ax, [bx+di+0]
+
+mov ax, [bp]
+mov ax, [bp+0]
+
+bits 32
+
+mov eax, [eax]
+mov eax, [eax+0]
+
+mov eax, [eax+ebx]
+mov eax, [eax+ebx+0]
+
+mov eax, [ebp]
+mov eax, [ebp+0]
+
+bits 64
+
+mov eax, [rax]
+mov eax, [rax+0]
+
+mov eax, [rax+rbx]
+mov eax, [rax+rbx+0]
+
+mov eax, [rbp]
+mov eax, [rbp+0]