From 3720f7beaeaefeb1e6bbf1bb8416ef78d4abe6e6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 May 2008 11:00:50 -0700 Subject: [PATCH] Generate a byte array instead of using strings for the byte codes Generate a byte array instead of using C compiler strings for the byte codes. This has a few advantages: - No need to special-case zero due to broken C compilers. - Only insns.pl only ever reads the string, so we can invent our own syntax. - Compaction. - We can give it the proper, unsigned type. --- Makefile.in | 12 ++++-- Mkfiles/msvc.mak | 16 +++++--- Mkfiles/netware.mak | 8 ++-- Mkfiles/openwcom.mak | 16 +++++--- Mkfiles/owlinux.mak | 18 +++++---- assemble.c | 14 +++---- insns.h | 2 +- insns.pl | 103 ++++++++++++++++++++++++++++++++++++++++++++++----- 8 files changed, 145 insertions(+), 44 deletions(-) diff --git a/Makefile.in b/Makefile.in index 287340f..a0308c0 100644 --- a/Makefile.in +++ b/Makefile.in @@ -84,6 +84,8 @@ ndisasm$(X): $(NDISASM) $(XOBJS) # though, so it isn't necessary to have Perl just to recompile NASM # from the distribution. +insnsb.c: insns.dat insns.pl + $(PERL) $(srcdir)/insns.pl -b $(srcdir)/insns.dat insnsa.c: insns.dat insns.pl $(PERL) $(srcdir)/insns.pl -a $(srcdir)/insns.dat insnsd.c: insns.dat insns.pl @@ -142,7 +144,7 @@ pptok.c: pptok.dat pptok.pl perllib/phash.ph # This target generates all files that require perl. # This allows easier generation of distribution (see dist target). -PERLREQ = macros.c insnsa.c insnsd.c insnsi.h insnsn.c \ +PERLREQ = macros.c insnsb.c insnsa.c insnsd.c insnsi.h insnsn.c \ regs.c regs.h regflags.c regdis.c regvals.c tokhash.c tokens.h \ version.h version.mac pptok.h pptok.c perlreq: $(PERLREQ) @@ -238,10 +240,12 @@ float.$(O): float.c compiler.h config.h float.h insnsi.h nasm.h nasmlib.h \ regs.h version.h hashtbl.$(O): hashtbl.c compiler.h config.h hashtbl.h insnsi.h nasm.h \ nasmlib.h regs.h version.h -insnsa.$(O): insnsa.c compiler.h config.h insns.h insnsi.h nasm.h nasmlib.h \ - regs.h tokens.h version.h -insnsd.$(O): insnsd.c compiler.h config.h insns.h insnsi.h nasm.h nasmlib.h \ +insnsa.$(O): insnsa.c compiler.h config.h insns.h insnsb.c insnsi.h nasm.h \ + nasmlib.h regs.h tokens.h version.h +insnsb.$(O): insnsb.c compiler.h config.h insns.h insnsi.h nasm.h nasmlib.h \ regs.h tokens.h version.h +insnsd.$(O): insnsd.c compiler.h config.h insns.h insnsb.c insnsi.h nasm.h \ + nasmlib.h regs.h tokens.h version.h insnsn.$(O): insnsn.c labels.$(O): labels.c compiler.h config.h hashtbl.h insnsi.h nasm.h \ nasmlib.h regs.h version.h diff --git a/Mkfiles/msvc.mak b/Mkfiles/msvc.mak index bf8bc01..e154634 100644 --- a/Mkfiles/msvc.mak +++ b/Mkfiles/msvc.mak @@ -19,8 +19,8 @@ CFLAGS = /O2 /Ox /Oy /W2 BUILD_CFLAGS = $(CFLAGS) /I$(srcdir)/inttypes INTERNAL_CFLAGS = /I$(srcdir) /I. /DHAVE__SNPRINTF /DHAVE__VSNPRINTF ALL_CFLAGS = $(BUILD_CFLAGS) $(INTERNAL_CFLAGS) -LDFLAGS = -LIBS = +LDFLAGS = +LIBS = PERL = perl -I$(srcdir)/perllib # Binary suffixes @@ -58,6 +58,8 @@ ndisasm$(X): $(NDISASM) # though, so it isn't necessary to have Perl just to recompile NASM # from the distribution. +insnsb.c: insns.dat insns.pl + $(PERL) $(srcdir)/insns.pl -b $(srcdir)/insns.dat insnsa.c: insns.dat insns.pl $(PERL) $(srcdir)/insns.pl -a $(srcdir)/insns.dat insnsd.c: insns.dat insns.pl @@ -113,7 +115,7 @@ pptok.c: pptok.dat pptok.pl perllib/phash.ph # This target generates all files that require perl. # This allows easier generation of distribution (see dist target). -PERLREQ = macros.c insnsa.c insnsd.c insnsi.h insnsn.c \ +PERLREQ = macros.c insnsb.c insnsa.c insnsd.c insnsi.h insnsn.c \ regs.c regs.h regflags.c regdis.c regvals.c tokhash.c tokens.h \ version.h version.mac pptok.h pptok.c perlreq: $(PERLREQ) @@ -186,10 +188,12 @@ float.$(O): float.c compiler.h float.h insnsi.h nasm.h nasmlib.h regs.h \ version.h hashtbl.$(O): hashtbl.c compiler.h hashtbl.h insnsi.h nasm.h nasmlib.h \ regs.h version.h -insnsa.$(O): insnsa.c compiler.h insns.h insnsi.h nasm.h nasmlib.h regs.h \ - tokens.h version.h -insnsd.$(O): insnsd.c compiler.h insns.h insnsi.h nasm.h nasmlib.h regs.h \ +insnsa.$(O): insnsa.c compiler.h insns.h insnsb.c insnsi.h nasm.h nasmlib.h \ + regs.h tokens.h version.h +insnsb.$(O): insnsb.c compiler.h insns.h insnsi.h nasm.h nasmlib.h regs.h \ tokens.h version.h +insnsd.$(O): insnsd.c compiler.h insns.h insnsb.c insnsi.h nasm.h nasmlib.h \ + regs.h tokens.h version.h insnsn.$(O): insnsn.c labels.$(O): labels.c compiler.h hashtbl.h insnsi.h nasm.h nasmlib.h regs.h \ version.h diff --git a/Mkfiles/netware.mak b/Mkfiles/netware.mak index 6f6e502..639b138 100644 --- a/Mkfiles/netware.mak +++ b/Mkfiles/netware.mak @@ -151,10 +151,12 @@ float.o: float.c compiler.h config.h float.h insnsi.h nasm.h nasmlib.h \ regs.h version.h hashtbl.o: hashtbl.c compiler.h config.h hashtbl.h insnsi.h nasm.h nasmlib.h \ regs.h version.h -insnsa.o: insnsa.c compiler.h config.h insns.h insnsi.h nasm.h nasmlib.h \ - regs.h tokens.h version.h -insnsd.o: insnsd.c compiler.h config.h insns.h insnsi.h nasm.h nasmlib.h \ +insnsa.o: insnsa.c compiler.h config.h insns.h insnsb.c insnsi.h nasm.h \ + nasmlib.h regs.h tokens.h version.h +insnsb.o: insnsb.c compiler.h config.h insns.h insnsi.h nasm.h nasmlib.h \ regs.h tokens.h version.h +insnsd.o: insnsd.c compiler.h config.h insns.h insnsb.c insnsi.h nasm.h \ + nasmlib.h regs.h tokens.h version.h insnsn.o: insnsn.c labels.o: labels.c compiler.h config.h hashtbl.h insnsi.h nasm.h nasmlib.h \ regs.h version.h diff --git a/Mkfiles/openwcom.mak b/Mkfiles/openwcom.mak index dbbc198..a9cc5c7 100644 --- a/Mkfiles/openwcom.mak +++ b/Mkfiles/openwcom.mak @@ -14,14 +14,14 @@ bindir = $(prefix)\bin mandir = $(prefix)\man CC = wcl386 -DEBUG = +DEBUG = CFLAGS = -6 -ox -wx -ze -fpi $(DEBUG) BUILD_CFLAGS = $(CFLAGS) $(TARGET_FLAGS) # -I$(srcdir)/inttypes INTERNAL_CFLAGS = -I$(srcdir) -I. -DHAVE_SNPRINTF -DHAVE_VSNPRINTF ALL_CFLAGS = $(BUILD_CFLAGS) $(INTERNAL_CFLAGS) LD = $(CC) LDFLAGS = $(ALL_CFLAGS) -LIBS = +LIBS = PERL = perl -I$(srcdir)/perllib STRIP = wstrip @@ -82,6 +82,8 @@ ndisasm$(X): $(NDISASM) # though, so it isn't necessary to have Perl just to recompile NASM # from the distribution. +insnsb.c: insns.dat insns.pl + $(PERL) $(srcdir)/insns.pl -b $(srcdir)/insns.dat insnsa.c: insns.dat insns.pl $(PERL) $(srcdir)/insns.pl -a $(srcdir)/insns.dat insnsd.c: insns.dat insns.pl @@ -137,7 +139,7 @@ pptok.c: pptok.dat pptok.pl perllib/phash.ph # This target generates all files that require perl. # This allows easier generation of distribution (see dist target). -PERLREQ = macros.c insnsa.c insnsd.c insnsi.h insnsn.c & +PERLREQ = macros.c insnsb.c insnsa.c insnsd.c insnsi.h insnsn.c & regs.c regs.h regflags.c regdis.c regvals.c tokhash.c tokens.h & version.h version.mac pptok.h pptok.c perlreq: $(PERLREQ) @@ -212,10 +214,12 @@ float.$(O): float.c compiler.h float.h insnsi.h nasm.h nasmlib.h regs.h & version.h hashtbl.$(O): hashtbl.c compiler.h hashtbl.h insnsi.h nasm.h nasmlib.h & regs.h version.h -insnsa.$(O): insnsa.c compiler.h insns.h insnsi.h nasm.h nasmlib.h regs.h & - tokens.h version.h -insnsd.$(O): insnsd.c compiler.h insns.h insnsi.h nasm.h nasmlib.h regs.h & +insnsa.$(O): insnsa.c compiler.h insns.h insnsb.c insnsi.h nasm.h nasmlib.h & + regs.h tokens.h version.h +insnsb.$(O): insnsb.c compiler.h insns.h insnsi.h nasm.h nasmlib.h regs.h & tokens.h version.h +insnsd.$(O): insnsd.c compiler.h insns.h insnsb.c insnsi.h nasm.h nasmlib.h & + regs.h tokens.h version.h insnsn.$(O): insnsn.c labels.$(O): labels.c compiler.h hashtbl.h insnsi.h nasm.h nasmlib.h regs.h & version.h diff --git a/Mkfiles/owlinux.mak b/Mkfiles/owlinux.mak index aed1188..ce369ed 100644 --- a/Mkfiles/owlinux.mak +++ b/Mkfiles/owlinux.mak @@ -12,7 +12,7 @@ # > building on DOS, Windows, or OS/2, as they share the same C # > library headers. But when cross-compiling from (or to) Linux, it # > is crucial. -# > +# > # > This may be accomplished by setting the INCLUDE env var in the # > makefile, or setting OS2_INCLUDE, DOS_INCLUDE, NT_INCLUDE env vars # > *and* making sure that the proper -bt switch is used, or passing a @@ -28,7 +28,7 @@ bindir = $(prefix)/bin mandir = $(prefix)/man CC = wcl386 -DEBUG = +DEBUG = CFLAGS = -6 -ox -wx -ze -fpi $(DEBUG) BUILD_CFLAGS = $(CFLAGS) $(TARGET_FLAGS) # -I$(srcdir)/inttypes INTERNAL_CFLAGS = -I$(srcdir) -I. \ @@ -36,7 +36,7 @@ INTERNAL_CFLAGS = -I$(srcdir) -I. \ ALL_CFLAGS = $(BUILD_CFLAGS) $(INTERNAL_CFLAGS) LD = $(CC) LDFLAGS = $(ALL_CFLAGS) -LIBS = +LIBS = PERL = perl -I$(srcdir)/perllib STRIP = wstrip @@ -92,6 +92,8 @@ ndisasm$(X): $(NDISASM) # though, so it isn't necessary to have Perl just to recompile NASM # from the distribution. +insnsb.c: insns.dat insns.pl + $(PERL) $(srcdir)/insns.pl -b $(srcdir)/insns.dat insnsa.c: insns.dat insns.pl $(PERL) $(srcdir)/insns.pl -a $(srcdir)/insns.dat insnsd.c: insns.dat insns.pl @@ -147,7 +149,7 @@ pptok.c: pptok.dat pptok.pl perllib/phash.ph # This target generates all files that require perl. # This allows easier generation of distribution (see dist target). -PERLREQ = macros.c insnsa.c insnsd.c insnsi.h insnsn.c \ +PERLREQ = macros.c insnsb.c insnsa.c insnsd.c insnsi.h insnsn.c \ regs.c regs.h regflags.c regdis.c regvals.c tokhash.c tokens.h \ version.h version.mac pptok.h pptok.c perlreq: $(PERLREQ) @@ -222,10 +224,12 @@ float.$(O): float.c compiler.h float.h insnsi.h nasm.h nasmlib.h regs.h \ version.h hashtbl.$(O): hashtbl.c compiler.h hashtbl.h insnsi.h nasm.h nasmlib.h \ regs.h version.h -insnsa.$(O): insnsa.c compiler.h insns.h insnsi.h nasm.h nasmlib.h regs.h \ - tokens.h version.h -insnsd.$(O): insnsd.c compiler.h insns.h insnsi.h nasm.h nasmlib.h regs.h \ +insnsa.$(O): insnsa.c compiler.h insns.h insnsb.c insnsi.h nasm.h nasmlib.h \ + regs.h tokens.h version.h +insnsb.$(O): insnsb.c compiler.h insns.h insnsi.h nasm.h nasmlib.h regs.h \ tokens.h version.h +insnsd.$(O): insnsd.c compiler.h insns.h insnsb.c insnsi.h nasm.h nasmlib.h \ + regs.h tokens.h version.h insnsn.$(O): insnsn.c labels.$(O): labels.c compiler.h hashtbl.h insnsi.h nasm.h nasmlib.h regs.h \ version.h diff --git a/assemble.c b/assemble.c index 56a05e6..104b42c 100644 --- a/assemble.c +++ b/assemble.c @@ -130,8 +130,8 @@ static efunc errfunc; static struct ofmt *outfmt; static ListGen *list; -static int64_t calcsize(int32_t, int64_t, int, insn *, const char *); -static void gencode(int32_t, int64_t, int, insn *, const char *, int64_t); +static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *); +static void gencode(int32_t, int64_t, int, insn *, const uint8_t *, int64_t); static int matches(const struct itemplate *, insn *, int bits); static int32_t regflag(const operand *); static int32_t regval(const operand *); @@ -231,7 +231,7 @@ static void out(int64_t offset, int32_t segto, const void *data, } static int jmp_match(int32_t segment, int64_t offset, int bits, - insn * ins, const char *code) + insn * ins, const uint8_t *code) { int64_t isize; uint8_t c = code[0]; @@ -462,7 +462,7 @@ int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp, m += jmp_match(segment, offset, bits, instruction, temp->code); if (m == 100) { /* matches! */ - const char *codes = temp->code; + const uint8_t *codes = temp->code; int64_t insn_size = calcsize(segment, offset, bits, instruction, codes); itimes = instruction->times; @@ -737,7 +737,7 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp, if (m == 100) { /* we've matched an instruction. */ int64_t isize; - const char *codes = temp->code; + const uint8_t *codes = temp->code; int j; isize = calcsize(segment, offset, bits, instruction, codes); @@ -826,7 +826,7 @@ static bool is_sbyte64(insn * ins, int op) return v32 >= -128 && v32 <= 127; } static int64_t calcsize(int32_t segment, int64_t offset, int bits, - insn * ins, const char *codes) + insn * ins, const uint8_t *codes) { int64_t length = 0; uint8_t c; @@ -1200,7 +1200,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits, } static void gencode(int32_t segment, int64_t offset, int bits, - insn * ins, const char *codes, int64_t insn_end) + insn * ins, const uint8_t *codes, int64_t insn_end) { static char condval[] = { /* conditional opcodes */ 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2, diff --git a/insns.h b/insns.h index 35d05b5..49cdc8c 100644 --- a/insns.h +++ b/insns.h @@ -16,7 +16,7 @@ struct itemplate { enum opcode opcode; /* the token, passed from "parser.c" */ int operands; /* number of operands */ opflags_t opd[MAX_OPERANDS]; /* bit flags for operand types */ - const char *code; /* the code it assembles to */ + const uint8_t *code; /* the code it assembles to */ uint32_t flags; /* some flags */ }; diff --git a/insns.pl b/insns.pl index 03c840a..eff70f5 100644 --- a/insns.pl +++ b/insns.pl @@ -17,7 +17,7 @@ print STDERR "Reading insns.dat...\n"; undef $output; foreach $arg ( @ARGV ) { if ( $arg =~ /^\-/ ) { - if ( $arg =~ /^\-([adin])$/ ) { + if ( $arg =~ /^\-([abdin])$/ ) { $output = $1; } else { die "$0: Unknown option: ${arg}\n"; @@ -31,6 +31,7 @@ $fname = "insns.dat" unless $fname = $args[0]; open (F, $fname) || die "unable to open $fname"; %dinstables = (); +@bytecode_list = (); $line = 0; $insns = 0; @@ -68,9 +69,59 @@ while () { close F; +# +# Generate the bytecode array. At this point, @bytecode_list contains +# the full set of bytecodes. +# + +# Sort by descending length +@bytecode_list = sort { scalar(@$b) <=> scalar(@$a) } @bytecode_list; +@bytecode_array = (); +%bytecode_pos = (); +$bytecode_next = 0; +foreach $bl (@bytecode_list) { + my $h = hexstr(@$bl); + next if (defined($bytecode_pos{$h})); + + push(@bytecode_array, $bl); + while ($h ne '') { + $bytecode_pos{$h} = $bytecode_next; + $h = substr($h, 2); + $bytecode_next++; + } +} +undef @bytecode_list; + @opcodes = sort keys(%k_opcodes); @opcodes_cc = sort keys(%k_opcodes_cc); +if ( !defined($output) || $output eq 'b') { + print STDERR "Writing insnsb.c...\n"; + + open B, ">insnsb.c"; + + print B "/* This file auto-generated from insns.dat by insns.pl" . + " - don't edit it */\n\n"; + + print B "#include \"nasm.h\"\n"; + print B "#include \"insns.h\"\n\n"; + + print B "static const uint8_t nasm_bytecodes[$bytecode_next] = {\n"; + + $p = 0; + foreach $bl (@bytecode_array) { + printf B " /* %4d */ ", $p; + foreach $d (@$bl) { + printf B "%#o,", $d; + $p++; + } + printf B "\n"; + } + print B "};\n"; + + close B; +} + if ( !defined($output) || $output eq 'a' ) { print STDERR "Writing insnsa.c...\n"; @@ -78,15 +129,14 @@ if ( !defined($output) || $output eq 'a' ) { print A "/* This file auto-generated from insns.dat by insns.pl" . " - don't edit it */\n\n"; - print A "#include \"nasm.h\"\n"; - print A "#include \"insns.h\"\n"; - print A "\n"; + + print A "#include \"insnsb.c\"\n\n"; foreach $i (@opcodes, @opcodes_cc) { print A "static const struct itemplate instrux_${i}[] = {\n"; $aname = "aa_$i"; foreach $j (@$aname) { - print A " $j\n"; + print A " ", codesubst($j), "\n"; } print A " ITEMPLATE_END\n};\n\n"; } @@ -106,14 +156,13 @@ if ( !defined($output) || $output eq 'd' ) { print D "/* This file auto-generated from insns.dat by insns.pl" . " - don't edit it */\n\n"; - print D "#include \"nasm.h\"\n"; - print D "#include \"insns.h\"\n"; - print D "\n"; + + print D "#include \"insnsb.c\"\n\n"; print D "static const struct itemplate instrux[] = {\n"; $n = 0; foreach $j (@big) { - printf D " /* %4d */ %s\n", $n++, $j; + printf D " /* %4d */ %s\n", $n++, codesubst($j); } print D "};\n"; @@ -230,6 +279,7 @@ printf STDERR "Done: %d instructions\n", $insns; sub format { my ($opcode, $operands, $codes, $flags) = @_; my $num, $nd = 0; + my @bytecode; return (undef, undef) if $operands eq "ignore"; @@ -260,7 +310,29 @@ sub format { $flags =~ s/(\|IF_ND|IF_ND\|)//, $nd = 1 if $flags =~ /IF_ND/; $flags = "IF_" . $flags; - ("{I_$opcode, $num, {$operands}, \"$codes\", $flags},", $nd); + @bytecode = (decodify($codes), 0); + push(@bytecode_list, [@bytecode]); + $codes = hexstr(@bytecode); + + ("{I_$opcode, $num, {$operands}, \@\@CODES-$codes\@\@, $flags},", $nd); +} + +# +# Look for @@CODES-xxx@@ sequences and replace them with the appropriate +# offset into nasm_bytecodes +# +sub codesubst($) { + my($s) = @_; + my $n; + + while ($s =~ /\@\@CODES-([0-9A-F]+)\@\@/) { + my $pos = $bytecode_pos{$1}; + if (!defined($pos)) { + die "$0: no position assigned to byte code $1\n"; + } + $s = $` . "nasm_bytecodes+${pos}" . "$'"; + } + return $s; } sub addprefix ($@) { @@ -303,6 +375,17 @@ sub decodify($) { return @codes; } +# Turn a numeric list into a hex string +sub hexstr(@) { + my $s = ''; + my $c; + + foreach $c (@_) { + $s .= sprintf("%02X", $c); + } + return $s; +} + # Here we determine the range of possible starting bytes for a given # instruction. We need only consider the codes: # \1 \2 \3 mean literal bytes, of course -- 2.7.4