From 2581c869b1ceabdc243f88b9b72d81554bc6a35a Mon Sep 17 00:00:00 2001
From: Charles Crayne <chuck@thor.crayne.org>
Date: Wed, 10 Sep 2008 19:21:52 -0700
Subject: [PATCH] Decouple forward references from optimization

Users who wish to control the level of optimization can
continue to specify -O0, -O1, or -Ox,
where x can be the letter itself, or any number > 1.

However, even with optimization turned off,
NASM will always make enough passes to resolve
forward references. As a result, INCBIN is now the only
item left in the critical expressions list, although TIMES
still has its own constant value check.
---
 assemble.c      | 11 +++----
 doc/nasmdoc.src | 89 +++++++++++----------------------------------------------
 nasm.c          | 11 ++++---
 parser.c        | 14 ++-------
 4 files changed, 29 insertions(+), 96 deletions(-)

diff --git a/assemble.c b/assemble.c
index 70228ac..16c8861 100644
--- a/assemble.c
+++ b/assemble.c
@@ -249,13 +249,10 @@ static int jmp_match(int32_t segment, int64_t offset, int bits,
 
     if (c != 0370 && c != 0371)
         return 0;
-    if (ins->oprs[0].opflags & OPFLAG_FORWARD) {
-        if ((optimizing < 0 || (ins->oprs[0].type & STRICT))
-            && c == 0370)
-            return 1;
-        else
-            return (pass0 == 0);        /* match a forward reference */
-    }
+    if ((optimizing <= 0 || (ins->oprs[0].type & STRICT)))
+         {
+            return 0;
+         }
     isize = calcsize(segment, offset, bits, ins, code);
     if (ins->oprs[0].segment != segment)
         return 0;
diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src
index 6685606..068d5d8 100644
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@@ -783,33 +783,26 @@ with a \i{stub preprocessor} which does nothing.
 
 \S{opt-On} The \i\c{-On} Option: Specifying \i{Multipass Optimization}.
 
-NASM defaults to being a two pass assembler. This means that if you
-have a complex source file which needs more than 2 passes to assemble
-optimally, you have to enable extra passes.
-
-Using the \c{-O} option, you can tell NASM to carry out multiple passes.
+NASM defaults to not optimizing operands which can fit into a signed byte.
+This means that if you want the shortest possible object code,
+you have to enable optimization.
+ 
+Using the \c{-O} option, you can tell NASM to carry out different levels of optimization.
 The syntax is:
 
-\b \c{-O0} strict two-pass assembly, JMP and Jcc are handled more
-        like v0.98, except that backward JMPs are short, if possible.
-        Immediate operands take their long forms if a short form is
-        not specified.
+\b \c{-O0} No optimization. All operands take their long forms,
+        if a short form is not specified.
 
-\b \c{-O1} strict two-pass assembly, but forward branches are assembled
-        with code guaranteed to reach; may produce larger code than
-        -O0, but will produce successful assembly more often if
-        branch offset sizes are not specified.
-        Additionally, immediate operands which will fit in a signed byte
-        are optimized, unless the long form is specified.
+\b \c{-O1} Minimal optimization. As above, but immediate operands
+        which will fit in a signed byte are optimized,
+        unless the long form is specified.
 
-\b \c{-On} multi-pass optimization, minimize branch offsets; also will
+\b \c{-Ox} where \c{x} is the actual letter \c{x} Multi-pass optimization,
+        minimize branch offsets; also will
         minimize signed immediate bytes, overriding size specification
         unless the \c{strict} keyword has been used (see \k{strict}).
-        The number specifies the maximum number of passes.  The more
-	passes, the better the code, but the slower is the assembly.
-
-\b \c{-Ox} where \c{x} is the actual letter \c{x}, indicates to NASM
-   	to do unlimited passes.
+        For compatability with earlier releases, the letter \c{x} may also be any
+        number greater than one. This number has no effect on the actual number of passes.
 
 Note that this is a capital \c{O}, and is different from a small \c{o}, which
 is used to specify the output file name. See \k{opt-o}.
@@ -1275,9 +1268,7 @@ redefined later. This is not a \i{preprocessor} definition either:
 the value of \c{msglen} is evaluated \e{once}, using the value of
 \c{$} (see \k{expr} for an explanation of \c{$}) at the point of
 definition, rather than being evaluated wherever it is referenced
-and using the value of \c{$} at the point of reference. Note that
-the operand to an \c{EQU} is also a \i{critical expression}
-(\k{crit}).
+and using the value of \c{$} at the point of reference.
 
 
 \S{times} \i\c{TIMES}: \i{Repeating} Instructions or Data
@@ -1306,8 +1297,7 @@ Note that there is no effective difference between \c{times 100 resb
 1} and \c{resb 100}, except that the latter will be assembled about
 100 times faster due to the internal structure of the assembler.
 
-The operand to \c{TIMES}, like that of \c{EQU} and those of \c{RESB}
-and friends, is a critical expression (\k{crit}).
+The operand to \c{TIMES} is a critical expression (\k{crit}).
 
 Note also that \c{TIMES} can't be applied to \i{macros}: the reason
 for this is that \c{TIMES} is processed after the macro phase, which
@@ -1813,52 +1803,7 @@ NASM rejects these examples by means of a concept called a
 \e{critical expression}, which is defined to be an expression whose
 value is required to be computable in the first pass, and which must
 therefore depend only on symbols defined before it. The argument to
-the \c{TIMES} prefix is a critical expression; for the same reason,
-the arguments to the \i\c{RESB} family of pseudo-instructions are
-also critical expressions.
-
-Critical expressions can crop up in other contexts as well: consider
-the following code.
-
-\c                 mov     ax,symbol1
-\c symbol1         equ     symbol2
-\c symbol2:
-
-On the first pass, NASM cannot determine the value of \c{symbol1},
-because \c{symbol1} is defined to be equal to \c{symbol2} which NASM
-hasn't seen yet. On the second pass, therefore, when it encounters
-the line \c{mov ax,symbol1}, it is unable to generate the code for
-it because it still doesn't know the value of \c{symbol1}. On the
-next line, it would see the \i\c{EQU} again and be able to determine
-the value of \c{symbol1}, but by then it would be too late.
-
-NASM avoids this problem by defining the right-hand side of an
-\c{EQU} statement to be a critical expression, so the definition of
-\c{symbol1} would be rejected in the first pass.
-
-There is a related issue involving \i{forward references}: consider
-this code fragment.
-
-\c         mov     eax,[ebx+offset]
-\c offset  equ     10
-
-NASM, on pass one, must calculate the size of the instruction \c{mov
-eax,[ebx+offset]} without knowing the value of \c{offset}. It has no
-way of knowing that \c{offset} is small enough to fit into a
-one-byte offset field and that it could therefore get away with
-generating a shorter form of the \i{effective-address} encoding; for
-all it knows, in pass one, \c{offset} could be a symbol in the code
-segment, and it might need the full four-byte form. So it is forced
-to compute the size of the instruction to accommodate a four-byte
-address part. In pass two, having made this decision, it is now
-forced to honour it and keep the instruction large, so the code
-generated in this case is not as small as it could have been. This
-problem can be solved by defining \c{offset} before using it, or by
-forcing byte size in the effective address by coding \c{[byte
-ebx+offset]}.
-
-Note that use of the \c{-On} switch (with n>=2) makes some of the above
-no longer true (see \k{opt-On}).
+the \c{TIMES} prefix is a critical expression.
 
 \H{locallab} \i{Local Labels}
 
diff --git a/nasm.c b/nasm.c
index 6208a72..198d4bb 100644
--- a/nasm.c
+++ b/nasm.c
@@ -294,7 +294,7 @@ int main(int argc, char **argv)
 
     time(&official_compile_time);
 
-    pass0 = 1;
+    pass0 = 0;
     want_usage = terminate_after_phase = false;
     report_error = report_error_gnu;
 
@@ -1166,8 +1166,7 @@ static void assemble_file(char *fname, StrList **depend_ptr)
         report_error(ERR_FATAL, "command line: "
                      "32-bit segment size requires a higher cpu");
 
-    pass_max = (optimizing > 0 ? optimizing : 0) + 2;   /* passes 1, optimizing, then 2 */
-    pass0 = !(optimizing > 0);  /* start at 1 if not optimizing */
+    pass_max = (INT_MAX >> 1) + 2; /* Almost unlimited */
     for (passn = 1; pass0 <= 2; passn++) {
         int pass1, pass2;
         ldfunc def_label;
@@ -1500,7 +1499,7 @@ static void assemble_file(char *fname, StrList **depend_ptr)
                 parse_line(pass1, line, &output_ins,
                            report_error, evaluate, def_label);
 
-                if (!(optimizing > 0) && pass0 == 2) {
+                if (optimizing > 0) {
                     if (forwref != NULL && globallineno == forwref->lineno) {
                         output_ins.forw_ref = true;
                         do {
@@ -1513,7 +1512,7 @@ static void assemble_file(char *fname, StrList **depend_ptr)
                         output_ins.forw_ref = false;
                 }
 
-                if (!(optimizing > 0) && output_ins.forw_ref) {
+                if (optimizing > 0) {
                     if (passn == 1) {
                         for (i = 0; i < output_ins.operands; i++) {
                             if (output_ins.oprs[i].
@@ -1768,7 +1767,7 @@ static void assemble_file(char *fname, StrList **depend_ptr)
     preproc->cleanup(0);
     nasmlist.cleanup();
 #if 1
-    if (optimizing > 0 && opt_verbose_info)     /*  -On and -Ov switches */
+    if (opt_verbose_info)     /*  -On and -Ov switches */
         fprintf(stdout,
                 "info:: assembly required 1+%d+1 passes\n", passn-3);
 #endif
diff --git a/parser.c b/parser.c
index a88e883..7e5c9a3 100644
--- a/parser.c
+++ b/parser.c
@@ -308,21 +308,13 @@ restart_parse:
     result->condition = tokval.t_inttwo;
 
     /*
-     * RESB, RESW and RESD cannot be satisfied with incorrectly
+     * INCBIN cannot be satisfied with incorrectly
      * evaluated operands, since the correct values _must_ be known
      * on the first pass. Hence, even in pass one, we set the
      * `critical' flag on calling evaluate(), so that it will bomb
-     * out on undefined symbols. Nasty, but there's nothing we can
-     * do about it.
-     *
-     * For the moment, EQU has the same difficulty, so we'll
-     * include that.
+     * out on undefined symbols.
      */
-    if (result->opcode == I_RESB || result->opcode == I_RESW ||
-	result->opcode == I_RESD || result->opcode == I_RESQ ||
-	result->opcode == I_REST || result->opcode == I_RESO ||
-	result->opcode == I_RESY ||
-	result->opcode == I_INCBIN) {
+    if (result->opcode == I_INCBIN) {
         critical = (pass0 < 2 ? 1 : 2);
 
     } else
-- 
2.7.4