From 2fb033af18c55d5e4412507b9a1bd513e8a6f2ff Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 21 May 2008 11:05:39 -0700 Subject: [PATCH] Disassembler: select table based on VEX prefixes We can use the new VEX prefixes to select into a large table of new opcode spaces. Since the table is (currently) sparse, add logic so we don't end up producing tons of empty tables for no good reason. This is also necessary since VEX is likely to reuse opcode bytes that would appear as prefixes at some point, which would cause conflicts with the regular tables. --- disasm.c | 15 ++++++++++++++- insns.h | 1 + insns.pl | 46 ++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/disasm.c b/disasm.c index 95fe2ad..f4cc6c6 100644 --- a/disasm.c +++ b/disasm.c @@ -1021,6 +1021,8 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, segover = NULL; origdata = data; + ix = itable; + end_prefix = false; while (!end_prefix) { switch (*data) { @@ -1028,9 +1030,11 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, case 0xF3: prefix.rep = *data++; break; + case 0xF0: prefix.lock = *data++; break; + case 0x2E: segover = "cs", prefix.seg = *data++; break; @@ -1049,6 +1053,7 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, case 0x65: segover = "gs", prefix.seg = *data++; break; + case 0x66: prefix.osize = (segsize == 16) ? 32 : 16; prefix.osp = *data++; @@ -1057,6 +1062,7 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, prefix.asize = (segsize == 32) ? 16 : 32; prefix.asp = *data++; break; + case 0xC4: case 0xC5: if (segsize == 64 || (data[1] & 0xc0) == 0xc0) { @@ -1078,8 +1084,11 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, prefix.vex_v = (~prefix.vex[1] >> 3) & 15; prefix.vex_lp = prefix.vex[1] & 7; } + + ix = itable_VEX[prefix.vex_m][prefix.vex_lp]; end_prefix = true; break; + case REX_P + 0x0: case REX_P + 0x1: case REX_P + 0x2: @@ -1103,6 +1112,7 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, } end_prefix = true; break; + default: end_prefix = true; break; @@ -1113,8 +1123,11 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize, best_p = NULL; best_pref = INT_MAX; + if (!ix) + return 0; /* No instruction table at all... */ + dp = data; - ix = itable + *dp++; + ix += *dp++; while (ix->n == -1) { ix = (const struct disasm_index *)ix->p + *dp++; } diff --git a/insns.h b/insns.h index 4f954a5..114b1ca 100644 --- a/insns.h +++ b/insns.h @@ -32,6 +32,7 @@ struct disasm_index { /* Tables for the assembler and disassembler, respectively */ extern const struct itemplate * const nasm_instructions[]; extern const struct disasm_index itable[256]; +extern const struct disasm_index * const itable_VEX[32][8]; /* Common table for the byte codes */ extern const uint8_t nasm_bytecodes[]; diff --git a/insns.pl b/insns.pl index bbb1702..594eec1 100644 --- a/insns.pl +++ b/insns.pl @@ -14,6 +14,15 @@ # This should match MAX_OPERANDS from nasm.h $MAX_OPERANDS = 5; +# Add VEX prefixes +@vexlist = (); +for ($m = 0; $m < 32; $m++) { + for ($lp = 0; $lp < 8; $lp++) { + push(@vexlist, sprintf("VEX%02X%01X", $m, $lp)); + } +} +@disasm_prefixes = (@vexlist, @disasm_prefixes); + print STDERR "Reading insns.dat...\n"; @args = (); @@ -182,8 +191,20 @@ if ( !defined($output) || $output eq 'd' ) { print D "};\n"; } + @prefix_list = (); foreach $h (@disasm_prefixes, '') { - $is_prefix{$h} = 1; + for ($c = 0; $c < 256; $c++) { + $nn = sprintf("%s%02X", $h, $c); + if ($is_prefix{$nn} || defined($dinstables{$nn})) { + # At least one entry in this prefix table + push(@prefix_list, $h); + $is_prefix{$h} = 1; + last; + } + } + } + + foreach $h (@prefix_list) { print D "\n"; print D "static " unless ($h eq ''); print D "const struct disasm_index "; @@ -202,8 +223,23 @@ if ( !defined($output) || $output eq 'd' ) { printf D " { NULL, 0 },\n"; } } - print D "};\n"; + print D "};\n"; + } + + print D "\nconst struct disasm_index * const itable_VEX[32][8] = {\n"; + for ($m = 0; $m < 32; $m++) { + print D "\t{\n"; + for ($lp = 0; $lp < 8; $lp++) { + $vp = sprintf("VEX%02X%01X", $m, $lp); + if ($is_prefix{$vp}) { + printf D "\t\titable_%s,\n", $vp; + } else { + print D "\t\tNULL,\n"; + } + } + print D "\t},\n"; } + print D "};\n"; close D; } @@ -464,8 +500,10 @@ sub startseq($) { } elsif ($c0 == 0 || $c0 == 0340) { return $prefix; } elsif (($c0 & ~3) == 0260 || $c0 == 0270) { - shift(@codes); # Skip VEX control bytes - shift(@codes); + my $m,$wlp,$vxp; + $m = shift(@codes); + $wlp = shift(@codes); + $prefix .= sprintf('VEX%02X%01X', $m, $wlp & 7); } elsif ($c0 >= 0172 && $c0 <= 174) { shift(@codes); # Skip is4 control byte } else { -- 2.7.4