1 /* Small compiler - Staging buffer and optimizer
5 * The staging buffer allows buffered output of generated code, deletion
6 * of redundant code, optimization by a tinkering process and reversing
7 * the ouput of evaluated expressions (which is used for the reversed
8 * evaluation of arguments in functions).
9 * Initially, stgwrite() writes to the file directly, but after a call to
10 * stgset(TRUE), output is redirected to the buffer. After a call to
11 * stgset(FALSE), stgwrite()'s output is directed to the file again. Thus
12 * only one routine is used for writing to the output, which can be
13 * buffered output or direct output.
15 * staging buffer variables: stgbuf - the buffer
16 * stgidx - current index in the staging buffer
17 * staging - if true, write to the staging buffer;
18 * if false, write to file directly.
20 * Copyright (c) ITB CompuPhase, 1997-2003
22 * This software is provided "as-is", without any express or implied warranty.
23 * In no event will the authors be held liable for any damages arising from
24 * the use of this software.
26 * Permission is granted to anyone to use this software for any purpose,
27 * including commercial applications, and to alter it and redistribute it
28 * freely, subject to the following restrictions:
30 * 1. The origin of this software must not be misrepresented; you must not
31 * claim that you wrote the original software. If you use this software in
32 * a product, an acknowledgment in the product documentation would be
33 * appreciated but is not required.
34 * 2. Altered source versions must be plainly marked as such, and must not be
35 * misrepresented as being the original software.
36 * 3. This notice may not be removed or altered from any source distribution.
42 * vim:ts=8:sw=3:sts=8:noexpandtab:cino=>5n-3f0^-2{2
51 #include <stdlib.h> /* for atoi() */
55 #include "embryo_cc_sc.h"
59 #pragma warning(disable:4125) /* decimal digit terminates octal escape sequence */
62 #include "embryo_cc_sc7.scp"
68 static void stgstring(char *start, char *end);
69 static void stgopt(char *start, char *end);
72 #define sSTG_MAX 20480
74 static char *stgbuf = NULL;
75 static int stgmax = 0; /* current size of the staging buffer */
77 #define CHECK_STGBUFFER(index) if ((int)(index)>=stgmax) grow_stgbuffer((index)+1)
80 grow_stgbuffer(int requiredsize)
83 int clear = stgbuf == NULL; /* if previously none, empty buffer explicitly */
85 assert(stgmax < requiredsize);
86 /* if the staging buffer (holding intermediate code for one line) grows
87 * over a few kBytes, there is probably a run-away expression
89 if (requiredsize > sSTG_MAX)
90 error(102, "staging buffer"); /* staging buffer overflow (fatal error) */
91 stgmax = requiredsize + sSTG_GROW;
93 p = (char *)realloc(stgbuf, stgmax * sizeof(char));
95 p = (char *)malloc(stgmax * sizeof(char));
97 error(102, "staging buffer"); /* staging buffer overflow (fatal error) */
104 stgbuffer_cleanup(void)
114 /* the variables "stgidx" and "staging" are declared in "scvars.c" */
118 * Copies a mark into the staging buffer. At this moment there are three
120 * sSTARTREORDER identifies the beginning of a series of expression
121 * strings that must be written to the output file in
123 * sENDREORDER identifies the end of 'reverse evaluation'
124 * sEXPRSTART + idx only valid within a block that is evaluated in
125 * reordered order, it identifies the start of an
126 * expression; the "idx" value is the argument position
128 * Global references: stgidx (altered)
130 * staging (referred to only)
137 CHECK_STGBUFFER(stgidx);
138 stgbuf[stgidx++] = mark;
145 if (sc_status == statWRITE)
146 return sc_writeasm(outf, str);
152 * Writes the string "st" to the staging buffer or to the output file. In the
153 * case of writing to the staging buffer, the terminating byte of zero is
154 * copied too, but... the optimizer can only work on complete lines (not on
155 * fractions of it. Therefore if the string is staged, if the last character
156 * written to the buffer is a '\0' and the previous-to-last is not a '\n',
157 * the string is concatenated to the last string in the buffer (the '\0' is
158 * overwritten). This also means an '\n' used in the middle of a string isn't
159 * recognized and could give wrong results with the optimizer.
160 * Even when writing to the output file directly, all strings are buffered
161 * until a whole line is complete.
163 * Global references: stgidx (altered)
165 * staging (referred to only)
175 if (stgidx >= 2 && stgbuf[stgidx - 1] == '\0'
176 && stgbuf[stgidx - 2] != '\n')
177 stgidx -= 1; /* overwrite last '\0' */
179 { /* copy to staging buffer */
180 CHECK_STGBUFFER(stgidx);
181 stgbuf[stgidx++] = *st++;
183 CHECK_STGBUFFER(stgidx);
184 stgbuf[stgidx++] = '\0';
188 CHECK_STGBUFFER(strlen(stgbuf) + strlen(st) + 1);
190 len = strlen(stgbuf);
191 if (len > 0 && stgbuf[len - 1] == '\n')
201 * Writes the staging buffer to the output file via stgstring() (for
202 * reversing expressions in the buffer) and stgopt() (for optimizing). It
205 * Global references: stgidx (altered)
206 * stgbuf (referred to only)
207 * staging (referred to only)
214 stgstring(&stgbuf[index], &stgbuf[stgidx]);
225 * Analyses whether code strings should be output to the file as they appear
226 * in the staging buffer or whether portions of it should be re-ordered.
227 * Re-ordering takes place in function argument lists; Small passes arguments
228 * to functions from right to left. When arguments are "named" rather than
229 * positional, the order in the source stream is indeterminate.
230 * This function calls itself recursively in case it needs to re-order code
231 * strings, and it uses a private stack (or list) to mark the start and the
232 * end of expressions in their correct (reversed) order.
233 * In any case, stgstring() sends a block as large as possible to the
234 * optimizer stgopt().
236 * In "reorder" mode, each set of code strings must start with the token
237 * sEXPRSTART, even the first. If the token sSTARTREORDER is represented
238 * by '[', sENDREORDER by ']' and sEXPRSTART by '|' the following applies:
239 * '[]...' valid, but useless; no output
240 * '[|...] valid, but useless; only one string
241 * '[|...|...] valid and usefull
242 * '[...|...] invalid, first string doesn't start with '|'
246 stgstring(char *start, char *end)
254 if (*start == sSTARTREORDER)
256 start += 1; /* skip token */
257 /* allocate a argstack with sMAXARGS items */
258 stack = (argstack *) malloc(sMAXARGS * sizeof(argstack));
260 error(103); /* insufficient memory */
261 nest = 1; /* nesting counter */
262 argc = 0; /* argument counter */
263 arg = -1; /* argument index; no valid argument yet */
277 if ((*start & sEXPRSTART) == sEXPRSTART)
282 stack[arg].end = start - 1; /* finish previous argument */
283 arg = (unsigned char)*start - sEXPRSTART;
284 stack[arg].start = start + 1;
292 start += strlen(start) + 1;
296 while (nest); /* enddo */
298 stack[arg].end = start - 1; /* finish previous argument */
302 stgstring(stack[argc].start, stack[argc].end);
309 while (ptr < end && *ptr != sSTARTREORDER)
310 ptr += strlen(ptr) + 1;
319 * Scraps code from the staging buffer by resetting "stgidx" to "index".
321 * Global references: stgidx (altered)
322 * staging (reffered to only)
325 stgdel(int index, cell code_index)
330 code_idx = code_index;
335 stgget(int *index, cell * code_index)
340 *code_index = code_idx;
347 * Sets staging on or off. If it's turned off, the staging buffer must be
348 * initialized to an empty string. If it's turned on, the routine makes sure
349 * the index ("stgidx") is set to 0 (it should already be 0).
351 * Global references: staging (altered)
353 * stgbuf (contents altered)
363 CHECK_STGBUFFER(stgidx);
364 /* write any contents that may be put in the buffer by stgwrite()
365 * when "staging" was 0
367 if (stgbuf[0] != '\0')
374 * Initialize all sequence strings of the peehole optimizer. The strings
375 * are embedded in the .EXE file in compressed format, here we expand
376 * them (and allocate memory for the sequences).
378 static SEQUENCE *sequences;
386 /* count number of sequences */
387 for (number = 0; sequences_cmp[number].find != NULL; number++)
389 number++; /* include an item for the NULL terminator */
391 if ((sequences = (SEQUENCE *) malloc(number * sizeof(SEQUENCE))) == NULL)
394 /* pre-initialize all to NULL (in case of failure) */
395 for (i = 0; i < number; i++)
397 sequences[i].find = NULL;
398 sequences[i].replace = NULL;
399 sequences[i].savesize = 0;
402 /* expand all strings */
403 for (i = 0; i < number - 1; i++)
406 strexpand(str, (unsigned char *)sequences_cmp[i].find, sizeof str,
408 assert(len <= sizeof str);
409 assert(len == (int)strlen(str) + 1);
410 sequences[i].find = (char *)malloc(len);
411 if (sequences[i].find != NULL)
412 strcpy(sequences[i].find, str);
414 strexpand(str, (unsigned char *)sequences_cmp[i].replace, sizeof str,
416 assert(len <= sizeof str);
417 assert(len == (int)strlen(str) + 1);
418 sequences[i].replace = (char *)malloc(len);
419 if (sequences[i].replace != NULL)
420 strcpy(sequences[i].replace, str);
421 sequences[i].savesize = sequences_cmp[i].savesize;
422 if (sequences[i].find == NULL || sequences[i].replace == NULL)
423 return phopt_cleanup();
434 if (sequences != NULL)
437 while (sequences[i].find != NULL || sequences[i].replace != NULL)
439 if (sequences[i].find != NULL)
440 free(sequences[i].find);
441 if (sequences[i].replace != NULL)
442 free(sequences[i].replace);
451 #define _maxoptvars 4
452 #define _aliasmax 10 /* a 32-bit number can be represented in
453 * 9 decimal digits */
456 matchsequence(char *start, char *end, char *pattern,
457 char symbols[_maxoptvars][_aliasmax + 1], int *match_length)
460 char str[_aliasmax + 1];
461 char *start_org = start;
464 for (var = 0; var < _maxoptvars; var++)
465 symbols[var][0] = '\0';
467 while (*start == '\t' || *start == ' ')
475 case '%': /* new "symbol" */
477 assert(isdigit(*pattern));
478 var = atoi(pattern) - 1;
479 assert(var >= 0 && var < _maxoptvars);
480 assert(alphanum(*start));
481 for (i = 0; start < end && alphanum(*start); i++, start++)
483 assert(i <= _aliasmax);
487 if (symbols[var][0] != '\0')
489 if (strcmp(symbols[var], str) != 0)
490 return FALSE; /* symbols should be identical */
494 strcpy(symbols[var], str);
498 if (*start != '\t' && *start != ' ')
500 while ((start < end && *start == '\t') || *start == ' ')
504 while ((start < end && *start == '\t') || *start == ' ')
505 start++; /* skip trailing white space */
508 assert(*(start + 1) == '\0');
509 start += 2; /* skip '\n' and '\0' */
510 if (*(pattern + 1) != '\0')
511 while ((start < end && *start == '\t') || *start == ' ')
512 start++; /* skip leading white space of next instruction */
515 if (tolower(*start) != tolower(*pattern))
522 *match_length = (int)(start - start_org);
527 replacesequence(char *pattern, char symbols[_maxoptvars][_aliasmax + 1],
534 /* calculate the length of the new buffer
535 * this is the length of the pattern plus the length of all symbols (note
536 * that the same symbol may occur multiple times in the pattern) plus
537 * line endings and startings ('\t' to start a line and '\n\0' to end one)
539 assert(repl_length != NULL);
547 lptr++; /* skip '%' */
548 assert(isdigit(*lptr));
549 var = atoi(lptr) - 1;
550 assert(var >= 0 && var < _maxoptvars);
551 assert(symbols[var][0] != '\0'); /* variable should be defined */
552 *repl_length += strlen(symbols[var]);
555 *repl_length += 3; /* '\t', '\n' & '\0' */
563 /* allocate a buffer to replace the sequence in */
564 if ((buffer = malloc(*repl_length)) == NULL)
570 /* replace the pattern into this temporary buffer */
572 *lptr++ = '\t'; /* the "replace" patterns do not have tabs */
575 assert((int)(lptr - buffer) < *repl_length);
579 /* write out the symbol */
581 assert(isdigit(*pattern));
582 var = atoi(pattern) - 1;
583 assert(var >= 0 && var < _maxoptvars);
584 assert(symbols[var][0] != '\0'); /* variable should be defined */
585 strcpy(lptr, symbols[var]);
586 lptr += strlen(symbols[var]);
589 /* finish the line, optionally start the next line with an indent */
592 if (*(pattern + 1) != '\0')
601 assert((int)(lptr - buffer) == *repl_length);
606 strreplace(char *dest, char *replace, int sub_length, int repl_length,
609 int offset = sub_length - repl_length;
611 if (offset > 0) /* delete a section */
612 memmove(dest, dest + offset, dest_length - offset);
613 else if (offset < 0) /* insert a section */
614 memmove(dest - offset, dest, dest_length);
615 memcpy(dest, replace, repl_length);
620 * Optimizes the staging buffer by checking for series of instructions that
621 * can be coded more compact. The routine expects the lines in the staging
622 * buffer to be separated with '\n' and '\0' characters.
624 * The longest sequences must be checked first.
628 stgopt(char *start, char *end)
630 char symbols[_maxoptvars][_aliasmax + 1];
631 int seq, match_length, repl_length;
633 assert(sequences != NULL);
636 if ((sc_debug & sNOOPTIMIZE) != 0 || sc_status != statWRITE)
638 /* do not match anything if debug-level is maximum */
644 while (sequences[seq].find != NULL)
648 (start, end, sequences[seq].find, symbols, &match_length))
651 replacesequence(sequences[seq].replace, symbols,
653 /* If the replacement is bigger than the original section, we may need
654 * to "grow" the staging buffer. This is quite complex, due to the
655 * re-ordering of expressions that can also happen in the staging
656 * buffer. In addition, it should not happen: the peephole optimizer
657 * must replace sequences with *shorter* sequences, not longer ones.
658 * So, I simply forbid sequences that are longer than the ones they
659 * are meant to replace.
661 assert(match_length >= repl_length);
662 if (match_length >= repl_length)
664 strreplace(start, replace, match_length,
665 repl_length, (int)(end - start));
666 end -= match_length - repl_length;
668 code_idx -= sequences[seq].savesize;
669 seq = 0; /* restart search for matches */
673 /* actually, we should never get here (match_length<repl_length) */
683 assert(sequences[seq].find == NULL);
687 start += strlen(start) + 1; /* to next string */
688 } /* while (start<end) */