1 /* Small compiler - Staging buffer and optimizer
5 * The staging buffer allows buffered output of generated code, deletion
6 * of redundant code, optimization by a tinkering process and reversing
7 * the ouput of evaluated expressions (which is used for the reversed
8 * evaluation of arguments in functions).
9 * Initially, stgwrite() writes to the file directly, but after a call to
10 * stgset(TRUE), output is redirected to the buffer. After a call to
11 * stgset(FALSE), stgwrite()'s output is directed to the file again. Thus
12 * only one routine is used for writing to the output, which can be
13 * buffered output or direct output.
15 * staging buffer variables: stgbuf - the buffer
16 * stgidx - current index in the staging buffer
17 * staging - if true, write to the staging buffer;
18 * if false, write to file directly.
20 * Copyright (c) ITB CompuPhase, 1997-2003
22 * This software is provided "as-is", without any express or implied warranty.
23 * In no event will the authors be held liable for any damages arising from
24 * the use of this software.
26 * Permission is granted to anyone to use this software for any purpose,
27 * including commercial applications, and to alter it and redistribute it
28 * freely, subject to the following restrictions:
30 * 1. The origin of this software must not be misrepresented; you must not
31 * claim that you wrote the original software. If you use this software in
32 * a product, an acknowledgment in the product documentation would be
33 * appreciated but is not required.
34 * 2. Altered source versions must be plainly marked as such, and must not be
35 * misrepresented as being the original software.
36 * 3. This notice may not be removed or altered from any source distribution.
48 #include <stdlib.h> /* for atoi() */
52 #include "embryo_cc_sc.h"
56 #pragma warning(disable:4125) /* decimal digit terminates octal escape sequence */
59 #include "embryo_cc_sc7.scp"
65 static void stgstring(char *start, char *end);
66 static void stgopt(char *start, char *end);
69 #define sSTG_MAX 20480
71 static char *stgbuf = NULL;
72 static int stgmax = 0; /* current size of the staging buffer */
74 #define CHECK_STGBUFFER(index) if ((int)(index)>=stgmax) grow_stgbuffer((index)+1)
77 grow_stgbuffer(int requiredsize)
80 int clear = !stgbuf; /* if previously none, empty buffer explicitly */
82 assert(stgmax < requiredsize);
83 /* if the staging buffer (holding intermediate code for one line) grows
84 * over a few kBytes, there is probably a run-away expression
86 if (requiredsize > sSTG_MAX)
87 error(102, "staging buffer"); /* staging buffer overflow (fatal error) */
88 stgmax = requiredsize + sSTG_GROW;
90 p = (char *)realloc(stgbuf, stgmax * sizeof(char));
92 p = (char *)malloc(stgmax * sizeof(char));
94 error(102, "staging buffer"); /* staging buffer overflow (fatal error) */
101 stgbuffer_cleanup(void)
111 /* the variables "stgidx" and "staging" are declared in "scvars.c" */
115 * Copies a mark into the staging buffer. At this moment there are three
117 * sSTARTREORDER identifies the beginning of a series of expression
118 * strings that must be written to the output file in
120 * sENDREORDER identifies the end of 'reverse evaluation'
121 * sEXPRSTART + idx only valid within a block that is evaluated in
122 * reordered order, it identifies the start of an
123 * expression; the "idx" value is the argument position
125 * Global references: stgidx (altered)
127 * staging (referred to only)
134 CHECK_STGBUFFER(stgidx);
135 stgbuf[stgidx++] = mark;
142 if (sc_status == statWRITE)
143 return sc_writeasm(outf, str);
149 * Writes the string "st" to the staging buffer or to the output file. In the
150 * case of writing to the staging buffer, the terminating byte of zero is
151 * copied too, but... the optimizer can only work on complete lines (not on
152 * fractions of it. Therefore if the string is staged, if the last character
153 * written to the buffer is a '\0' and the previous-to-last is not a '\n',
154 * the string is concatenated to the last string in the buffer (the '\0' is
155 * overwritten). This also means an '\n' used in the middle of a string isn't
156 * recognized and could give wrong results with the optimizer.
157 * Even when writing to the output file directly, all strings are buffered
158 * until a whole line is complete.
160 * Global references: stgidx (altered)
162 * staging (referred to only)
172 if (stgidx >= 2 && stgbuf[stgidx - 1] == '\0'
173 && stgbuf[stgidx - 2] != '\n')
174 stgidx -= 1; /* overwrite last '\0' */
176 { /* copy to staging buffer */
177 CHECK_STGBUFFER(stgidx);
178 stgbuf[stgidx++] = *st++;
180 CHECK_STGBUFFER(stgidx);
181 stgbuf[stgidx++] = '\0';
185 CHECK_STGBUFFER(strlen(stgbuf) + strlen(st) + 1);
187 len = strlen(stgbuf);
188 if (len > 0 && stgbuf[len - 1] == '\n')
198 * Writes the staging buffer to the output file via stgstring() (for
199 * reversing expressions in the buffer) and stgopt() (for optimizing). It
202 * Global references: stgidx (altered)
203 * stgbuf (referred to only)
204 * staging (referred to only)
211 stgstring(&stgbuf[index], &stgbuf[stgidx]);
222 * Analyses whether code strings should be output to the file as they appear
223 * in the staging buffer or whether portions of it should be re-ordered.
224 * Re-ordering takes place in function argument lists; Small passes arguments
225 * to functions from right to left. When arguments are "named" rather than
226 * positional, the order in the source stream is indeterminate.
227 * This function calls itself recursively in case it needs to re-order code
228 * strings, and it uses a private stack (or list) to mark the start and the
229 * end of expressions in their correct (reversed) order.
230 * In any case, stgstring() sends a block as large as possible to the
231 * optimizer stgopt().
233 * In "reorder" mode, each set of code strings must start with the token
234 * sEXPRSTART, even the first. If the token sSTARTREORDER is represented
235 * by '[', sENDREORDER by ']' and sEXPRSTART by '|' the following applies:
236 * '[]...' valid, but useless; no output
237 * '[|...] valid, but useless; only one string
238 * '[|...|...] valid and useful
239 * '[...|...] invalid, first string doesn't start with '|'
243 stgstring(char *start, char *end)
251 if (*start == sSTARTREORDER)
253 start += 1; /* skip token */
254 /* allocate a argstack with sMAXARGS items */
255 stack = (argstack *) malloc(sMAXARGS * sizeof(argstack));
257 error(103); /* insufficient memory */
258 nest = 1; /* nesting counter */
259 argc = 0; /* argument counter */
260 arg = -1; /* argument index; no valid argument yet */
274 if ((*start & sEXPRSTART) == sEXPRSTART)
279 stack[arg].end = start - 1; /* finish previous argument */
280 arg = (unsigned char)*start - sEXPRSTART;
281 stack[arg].start = start + 1;
289 start += strlen(start) + 1;
293 while (nest); /* enddo */
295 stack[arg].end = start - 1; /* finish previous argument */
299 stgstring(stack[argc].start, stack[argc].end);
306 while (ptr < end && *ptr != sSTARTREORDER)
307 ptr += strlen(ptr) + 1;
316 * Scraps code from the staging buffer by resetting "stgidx" to "index".
318 * Global references: stgidx (altered)
319 * staging (referred to only)
322 stgdel(int index, cell code_index)
327 code_idx = code_index;
332 stgget(int *index, cell * code_index)
337 *code_index = code_idx;
344 * Sets staging on or off. If it's turned off, the staging buffer must be
345 * initialized to an empty string. If it's turned on, the routine makes sure
346 * the index ("stgidx") is set to 0 (it should already be 0).
348 * Global references: staging (altered)
350 * stgbuf (contents altered)
360 CHECK_STGBUFFER(stgidx);
361 /* write any contents that may be put in the buffer by stgwrite()
362 * when "staging" was 0
364 if (stgbuf[0] != '\0')
371 * Initialize all sequence strings of the peehole optimizer. The strings
372 * are embedded in the .EXE file in compressed format, here we expand
373 * them (and allocate memory for the sequences).
375 static SEQUENCE *sequences;
383 /* count number of sequences */
384 for (number = 0; sequences_cmp[number].find; number++)
386 number++; /* include an item for the NULL terminator */
388 if (!(sequences = (SEQUENCE *)malloc(number * sizeof(SEQUENCE))))
391 /* pre-initialize all to NULL (in case of failure) */
392 for (i = 0; i < number; i++)
394 sequences[i].find = NULL;
395 sequences[i].replace = NULL;
396 sequences[i].savesize = 0;
399 /* expand all strings */
400 for (i = 0; i < number - 1; i++)
403 strexpand(str, (unsigned char *)sequences_cmp[i].find, sizeof str,
405 assert(len <= sizeof str);
406 assert(len == (int)strlen(str) + 1);
407 sequences[i].find = (char *)malloc(len);
408 if (sequences[i].find)
409 strcpy(sequences[i].find, str);
411 strexpand(str, (unsigned char *)sequences_cmp[i].replace, sizeof str,
413 assert(len <= sizeof str);
414 assert(len == (int)strlen(str) + 1);
415 sequences[i].replace = (char *)malloc(len);
416 if (sequences[i].replace)
417 strcpy(sequences[i].replace, str);
418 sequences[i].savesize = sequences_cmp[i].savesize;
419 if (!sequences[i].find || !sequences[i].replace)
420 return phopt_cleanup();
434 while (sequences[i].find || sequences[i].replace)
436 if (sequences[i].find)
437 free(sequences[i].find);
438 if (sequences[i].replace)
439 free(sequences[i].replace);
448 #define _maxoptvars 4
449 #define _aliasmax 10 /* a 32-bit number can be represented in
450 * 9 decimal digits */
453 matchsequence(char *start, char *end, char *pattern,
454 char symbols[_maxoptvars][_aliasmax + 1], int *match_length)
457 char str[_aliasmax + 1];
458 char *start_org = start;
461 for (var = 0; var < _maxoptvars; var++)
462 symbols[var][0] = '\0';
464 while (*start == '\t' || *start == ' ')
472 case '%': /* new "symbol" */
474 assert(isdigit(*pattern));
475 var = atoi(pattern) - 1;
476 assert(var >= 0 && var < _maxoptvars);
477 assert(alphanum(*start));
478 for (i = 0; start < end && alphanum(*start); i++, start++)
480 assert(i <= _aliasmax);
484 if (symbols[var][0] != '\0')
486 if (strcmp(symbols[var], str) != 0)
487 return FALSE; /* symbols should be identical */
491 strcpy(symbols[var], str);
495 if (*start != '\t' && *start != ' ')
497 while ((start < end && *start == '\t') || *start == ' ')
501 while ((start < end && *start == '\t') || *start == ' ')
502 start++; /* skip trailing white space */
505 assert(*(start + 1) == '\0');
506 start += 2; /* skip '\n' and '\0' */
507 if (*(pattern + 1) != '\0')
508 while ((start < end && *start == '\t') || *start == ' ')
509 start++; /* skip leading white space of next instruction */
512 if (tolower(*start) != tolower(*pattern))
519 *match_length = (int)(start - start_org);
524 replacesequence(char *pattern, char symbols[_maxoptvars][_aliasmax + 1],
531 /* calculate the length of the new buffer
532 * this is the length of the pattern plus the length of all symbols (note
533 * that the same symbol may occur multiple times in the pattern) plus
534 * line endings and startings ('\t' to start a line and '\n\0' to end one)
536 assert(repl_length != NULL);
544 lptr++; /* skip '%' */
545 assert(isdigit(*lptr));
546 var = atoi(lptr) - 1;
547 assert(var >= 0 && var < _maxoptvars);
548 assert(symbols[var][0] != '\0'); /* variable should be defined */
549 *repl_length += strlen(symbols[var]);
552 *repl_length += 3; /* '\t', '\n' & '\0' */
560 /* allocate a buffer to replace the sequence in */
561 if (!(buffer = malloc(*repl_length)))
567 /* replace the pattern into this temporary buffer */
569 *lptr++ = '\t'; /* the "replace" patterns do not have tabs */
572 assert((int)(lptr - buffer) < *repl_length);
576 /* write out the symbol */
578 assert(isdigit(*pattern));
579 var = atoi(pattern) - 1;
580 assert(var >= 0 && var < _maxoptvars);
581 assert(symbols[var][0] != '\0'); /* variable should be defined */
582 strcpy(lptr, symbols[var]);
583 lptr += strlen(symbols[var]);
586 /* finish the line, optionally start the next line with an indent */
589 if (*(pattern + 1) != '\0')
598 assert((int)(lptr - buffer) == *repl_length);
603 strreplace(char *dest, char *replace, int sub_length, int repl_length,
606 int offset = sub_length - repl_length;
608 if (offset > 0) /* delete a section */
609 memmove(dest, dest + offset, dest_length - offset);
610 else if (offset < 0) /* insert a section */
611 memmove(dest - offset, dest, dest_length);
612 memcpy(dest, replace, repl_length);
617 * Optimizes the staging buffer by checking for series of instructions that
618 * can be coded more compact. The routine expects the lines in the staging
619 * buffer to be separated with '\n' and '\0' characters.
621 * The longest sequences must be checked first.
625 stgopt(char *start, char *end)
627 char symbols[_maxoptvars][_aliasmax + 1];
628 int seq, match_length, repl_length;
630 assert(sequences != NULL);
633 if ((sc_debug & sNOOPTIMIZE) != 0 || sc_status != statWRITE)
635 /* do not match anything if debug-level is maximum */
641 while (sequences[seq].find)
645 (start, end, sequences[seq].find, symbols, &match_length))
648 replacesequence(sequences[seq].replace, symbols,
650 /* If the replacement is bigger than the original section, we may need
651 * to "grow" the staging buffer. This is quite complex, due to the
652 * re-ordering of expressions that can also happen in the staging
653 * buffer. In addition, it should not happen: the peephole optimizer
654 * must replace sequences with *shorter* sequences, not longer ones.
655 * So, I simply forbid sequences that are longer than the ones they
656 * are meant to replace.
658 assert(match_length >= repl_length);
659 if (match_length >= repl_length)
661 strreplace(start, replace, match_length,
662 repl_length, (int)(end - start));
663 end -= match_length - repl_length;
665 code_idx -= sequences[seq].savesize;
666 seq = 0; /* restart search for matches */
670 /* actually, we should never get here (match_length<repl_length) */
680 assert(sequences[seq].find == NULL);
684 start += strlen(start) + 1; /* to next string */
685 } /* while (start<end) */