src/tools/traceline/traceline/sidestep/mini_disassembler.cc

   1 // Copyright 2008, Google Inc.
   2 // All rights reserved.
   3 //
   4 // Redistribution and use in source and binary forms, with or without
   5 // modification, are permitted provided that the following conditions are
   6 // met:
   7 //
   8 //    * Redistributions of source code must retain the above copyright
   9 // notice, this list of conditions and the following disclaimer.
  10 //    * Redistributions in binary form must reproduce the above
  11 // copyright notice, this list of conditions and the following disclaimer
  12 // in the documentation and/or other materials provided with the
  13 // distribution.
  14 //    * Neither the name of Google Inc. nor the names of its
  15 // contributors may be used to endorse or promote products derived from
  16 // this software without specific prior written permission.
  17 //
  18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30 // Implementation of MiniDisassembler.
  31
  32 #include "sidestep/mini_disassembler.h"
  33
  34 namespace sidestep {
  35
  36 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
  37                                    bool address_default_is_32_bits)
  38     : operand_default_is_32_bits_(operand_default_is_32_bits),
  39       address_default_is_32_bits_(address_default_is_32_bits) {
  40   Initialize();
  41 }
  42
  43 MiniDisassembler::MiniDisassembler()
  44     : operand_default_is_32_bits_(true),
  45       address_default_is_32_bits_(true) {
  46   Initialize();
  47 }
  48
  49 InstructionType MiniDisassembler::Disassemble(
  50     unsigned char* start_byte,
  51     unsigned int* instruction_bytes) {
  52   // Clean up any state from previous invocations.
  53   Initialize();
  54
  55   // Start by processing any prefixes.
  56   unsigned char* current_byte = start_byte;
  57   unsigned int size = 0;
  58   InstructionType instruction_type = ProcessPrefixes(current_byte, &size);
  59
  60   if (IT_UNKNOWN == instruction_type)
  61     return instruction_type;
  62
  63   current_byte += size;
  64   size = 0;
  65
  66   // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
  67   // and address_is_32_bits_ flags are correctly set.
  68
  69   instruction_type = ProcessOpcode(current_byte, 0, &size);
  70
  71   // Check for error processing instruction
  72   if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
  73     return IT_UNKNOWN;
  74   }
  75
  76   current_byte += size;
  77
  78   // Invariant: operand_bytes_ indicates the total size of operands
  79   // specified by the opcode and/or ModR/M byte and/or SIB byte.
  80   // pCurrentByte points to the first byte after the ModR/M byte, or after
  81   // the SIB byte if it is present (i.e. the first byte of any operands
  82   // encoded in the instruction).
  83
  84   // We get the total length of any prefixes, the opcode, and the ModR/M and
  85   // SIB bytes if present, by taking the difference of the original starting
  86   // address and the current byte (which points to the first byte of the
  87   // operands if present, or to the first byte of the next instruction if
  88   // they are not).  Adding the count of bytes in the operands encoded in
  89   // the instruction gives us the full length of the instruction in bytes.
  90   *instruction_bytes += operand_bytes_ + (current_byte - start_byte);
  91
  92   // Return the instruction type, which was set by ProcessOpcode().
  93   return instruction_type_;
  94 }
  95
  96 void MiniDisassembler::Initialize() {
  97   operand_is_32_bits_ = operand_default_is_32_bits_;
  98   address_is_32_bits_ = address_default_is_32_bits_;
  99   operand_bytes_ = 0;
 100   have_modrm_ = false;
 101   should_decode_modrm_ = false;
 102   instruction_type_ = IT_UNKNOWN;
 103   got_f2_prefix_ = false;
 104   got_f3_prefix_ = false;
 105   got_66_prefix_ = false;
 106 }
 107
 108 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
 109                                                   unsigned int* size) {
 110   InstructionType instruction_type = IT_GENERIC;
 111   const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
 112
 113   switch (opcode.type_) {
 114     case IT_PREFIX_ADDRESS:
 115       address_is_32_bits_ = !address_default_is_32_bits_;
 116       goto nochangeoperand;
 117     case IT_PREFIX_OPERAND:
 118       operand_is_32_bits_ = !operand_default_is_32_bits_;
 119       nochangeoperand:
 120     case IT_PREFIX:
 121
 122       if (0xF2 == (*start_byte))
 123         got_f2_prefix_ = true;
 124       else if (0xF3 == (*start_byte))
 125         got_f3_prefix_ = true;
 126       else if (0x66 == (*start_byte))
 127         got_66_prefix_ = true;
 128
 129       instruction_type = opcode.type_;
 130       (*size)++;
 131       // we got a prefix, so add one and check next byte
 132       ProcessPrefixes(start_byte + 1, size);
 133     default:
 134       break;   // not a prefix byte
 135   }
 136
 137   return instruction_type;
 138 }
 139
 140 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
 141                                                 unsigned int table_index,
 142                                                 unsigned int* size) {
 143   const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
 144   unsigned char current_byte = (*start_byte) >> table.shift_;
 145   current_byte = current_byte & table.mask_;  // Mask out the bits we will use
 146
 147   // Check whether the byte we have is inside the table we have.
 148   if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
 149     instruction_type_ = IT_UNKNOWN;
 150     return instruction_type_;
 151   }
 152
 153   const Opcode& opcode = table.table_[current_byte];
 154   if (IT_UNUSED == opcode.type_) {
 155     // This instruction is not used by the IA-32 ISA, so we indicate
 156     // this to the user.  Probably means that we were pointed to
 157     // a byte in memory that was not the start of an instruction.
 158     instruction_type_ = IT_UNUSED;
 159     return instruction_type_;
 160   } else if (IT_REFERENCE == opcode.type_) {
 161     // We are looking at an opcode that has more bytes (or is continued
 162     // in the ModR/M byte).  Recursively find the opcode definition in
 163     // the table for the opcode's next byte.
 164     (*size)++;
 165     ProcessOpcode(start_byte + 1, opcode.table_index_, size);
 166     return instruction_type_;
 167   }
 168
 169   const SpecificOpcode* specific_opcode = reinterpret_cast<
 170                                               const SpecificOpcode*>(&opcode);
 171   if (opcode.is_prefix_dependent_) {
 172     if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
 173       specific_opcode = &opcode.opcode_if_f2_prefix_;
 174     } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
 175       specific_opcode = &opcode.opcode_if_f3_prefix_;
 176     } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
 177       specific_opcode = &opcode.opcode_if_66_prefix_;
 178     }
 179   }
 180
 181   // Inv: The opcode type is known.
 182   instruction_type_ = specific_opcode->type_;
 183
 184   // Let's process the operand types to see if we have any immediate
 185   // operands, and/or a ModR/M byte.
 186
 187   ProcessOperand(specific_opcode->flag_dest_);
 188   ProcessOperand(specific_opcode->flag_source_);
 189   ProcessOperand(specific_opcode->flag_aux_);
 190
 191   // Inv: We have processed the opcode and incremented operand_bytes_
 192   // by the number of bytes of any operands specified by the opcode
 193   // that are stored in the instruction (not registers etc.).  Now
 194   // we need to return the total number of bytes for the opcode and
 195   // for the ModR/M or SIB bytes if they are present.
 196
 197   if (table.mask_ != 0xff) {
 198     if (have_modrm_) {
 199       // we're looking at a ModR/M byte so we're not going to
 200       // count that into the opcode size
 201       ProcessModrm(start_byte, size);
 202       return IT_GENERIC;
 203     } else {
 204       // need to count the ModR/M byte even if it's just being
 205       // used for opcode extension
 206       (*size)++;
 207       return IT_GENERIC;
 208     }
 209   } else {
 210     if (have_modrm_) {
 211       // The ModR/M byte is the next byte.
 212       (*size)++;
 213       ProcessModrm(start_byte + 1, size);
 214       return IT_GENERIC;
 215     } else {
 216       (*size)++;
 217       return IT_GENERIC;
 218     }
 219   }
 220 }
 221
 222 bool MiniDisassembler::ProcessOperand(int flag_operand) {
 223   bool succeeded = true;
 224   if (AM_NOT_USED == flag_operand)
 225     return succeeded;
 226
 227   // Decide what to do based on the addressing mode.
 228   switch (flag_operand & AM_MASK) {
 229     // No ModR/M byte indicated by these addressing modes, and no
 230     // additional (e.g. immediate) parameters.
 231     case AM_A:  // Direct address
 232     case AM_F:  // EFLAGS register
 233     case AM_X:  // Memory addressed by the DS:SI register pair
 234     case AM_Y:  // Memory addressed by the ES:DI register pair
 235     case AM_IMPLICIT:  // Parameter is implicit, occupies no space in
 236                        // instruction
 237       break;
 238
 239     // There is a ModR/M byte but it does not necessarily need
 240     // to be decoded.
 241     case AM_C:  // reg field of ModR/M selects a control register
 242     case AM_D:  // reg field of ModR/M selects a debug register
 243     case AM_G:  // reg field of ModR/M selects a general register
 244     case AM_P:  // reg field of ModR/M selects an MMX register
 245     case AM_R:  // mod field of ModR/M may refer only to a general register
 246     case AM_S:  // reg field of ModR/M selects a segment register
 247     case AM_T:  // reg field of ModR/M selects a test register
 248     case AM_V:  // reg field of ModR/M selects a 128-bit XMM register
 249       have_modrm_ = true;
 250       break;
 251
 252     // In these addressing modes, there is a ModR/M byte and it needs to be
 253     // decoded. No other (e.g. immediate) params than indicated in ModR/M.
 254     case AM_E:  // Operand is either a general-purpose register or memory,
 255                 // specified by ModR/M byte
 256     case AM_M:  // ModR/M byte will refer only to memory
 257     case AM_Q:  // Operand is either an MMX register or memory (complex
 258                 // evaluation), specified by ModR/M byte
 259     case AM_W:  // Operand is either a 128-bit XMM register or memory (complex
 260                 // eval), specified by ModR/M byte
 261       have_modrm_ = true;
 262       should_decode_modrm_ = true;
 263       break;
 264
 265     // These addressing modes specify an immediate or an offset value
 266     // directly, so we need to look at the operand type to see how many
 267     // bytes.
 268     case AM_I:  // Immediate data.
 269     case AM_J:  // Jump to offset.
 270     case AM_O:  // Operand is at offset.
 271       switch (flag_operand & OT_MASK) {
 272         case OT_B:  // Byte regardless of operand-size attribute.
 273           operand_bytes_ += OS_BYTE;
 274           break;
 275         case OT_C:  // Byte or word, depending on operand-size attribute.
 276           if (operand_is_32_bits_)
 277             operand_bytes_ += OS_WORD;
 278           else
 279             operand_bytes_ += OS_BYTE;
 280           break;
 281         case OT_D:  // Doubleword, regardless of operand-size attribute.
 282           operand_bytes_ += OS_DOUBLE_WORD;
 283           break;
 284         case OT_DQ:  // Double-quadword, regardless of operand-size attribute.
 285           operand_bytes_ += OS_DOUBLE_QUAD_WORD;
 286           break;
 287         case OT_P:  // 32-bit or 48-bit pointer, depending on operand-size
 288                     // attribute.
 289           if (operand_is_32_bits_)
 290             operand_bytes_ += OS_48_BIT_POINTER;
 291           else
 292             operand_bytes_ += OS_32_BIT_POINTER;
 293           break;
 294         case OT_PS:  // 128-bit packed single-precision floating-point data.
 295           operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
 296           break;
 297         case OT_Q:  // Quadword, regardless of operand-size attribute.
 298           operand_bytes_ += OS_QUAD_WORD;
 299           break;
 300         case OT_S:  // 6-byte pseudo-descriptor.
 301           operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
 302           break;
 303         case OT_SD:  // Scalar Double-Precision Floating-Point Value
 304         case OT_PD:  // Unaligned packed double-precision floating point value
 305           operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
 306           break;
 307         case OT_SS:
 308           // Scalar element of a 128-bit packed single-precision
 309           // floating data.
 310           // We simply return enItUnknown since we don't have to support
 311           // floating point
 312           succeeded = false;
 313           break;
 314         case OT_V:  // Word or doubleword, depending on operand-size attribute.
 315           if (operand_is_32_bits_)
 316             operand_bytes_ += OS_DOUBLE_WORD;
 317           else
 318             operand_bytes_ += OS_WORD;
 319           break;
 320         case OT_W:  // Word, regardless of operand-size attribute.
 321           operand_bytes_ += OS_WORD;
 322           break;
 323
 324         // Can safely ignore these.
 325         case OT_A:  // Two one-word operands in memory or two double-word
 326                     // operands in memory
 327         case OT_PI:  // Quadword MMX technology register (e.g. mm0)
 328         case OT_SI:  // Doubleword integer register (e.g., eax)
 329           break;
 330
 331         default:
 332           break;
 333       }
 334       break;
 335
 336     default:
 337       break;
 338   }
 339
 340   return succeeded;
 341 }
 342
 343 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
 344                                     unsigned int* size) {
 345   // If we don't need to decode, we just return the size of the ModR/M
 346   // byte (there is never a SIB byte in this case).
 347   if (!should_decode_modrm_) {
 348     (*size)++;
 349     return true;
 350   }
 351
 352   // We never care about the reg field, only the combination of the mod
 353   // and r/m fields, so let's start by packing those fields together into
 354   // 5 bits.
 355   unsigned char modrm = (*start_byte);
 356   unsigned char mod = modrm & 0xC0;  // mask out top two bits to get mod field
 357   modrm = modrm & 0x07;  // mask out bottom 3 bits to get r/m field
 358   mod = mod >> 3;  // shift the mod field to the right place
 359   modrm = mod | modrm;  // combine the r/m and mod fields as discussed
 360   mod = mod >> 3;  // shift the mod field to bits 2..0
 361
 362   // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
 363   // in bits 2..0, and mod contains the mod field in bits 2..0
 364
 365   const ModrmEntry* modrm_entry = 0;
 366   if (address_is_32_bits_)
 367     modrm_entry = &s_ia32_modrm_map_[modrm];
 368   else
 369     modrm_entry = &s_ia16_modrm_map_[modrm];
 370
 371   // Invariant: modrm_entry points to information that we need to decode
 372   // the ModR/M byte.
 373
 374   // Add to the count of operand bytes, if the ModR/M byte indicates
 375   // that some operands are encoded in the instruction.
 376   if (modrm_entry->is_encoded_in_instruction_)
 377     operand_bytes_ += modrm_entry->operand_size_;
 378
 379   // Process the SIB byte if necessary, and return the count
 380   // of ModR/M and SIB bytes.
 381   if (modrm_entry->use_sib_byte_) {
 382     (*size)++;
 383     return ProcessSib(start_byte + 1, mod, size);
 384   } else {
 385     (*size)++;
 386     return true;
 387   }
 388 }
 389
 390 bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
 391                                   unsigned char mod,
 392                                   unsigned int* size) {
 393   // get the mod field from the 2..0 bits of the SIB byte
 394   unsigned char sib_base = (*start_byte) & 0x07;
 395   if (0x05 == sib_base) {
 396     switch (mod) {
 397       case 0x00:  // mod == 00
 398       case 0x02:  // mod == 10
 399         operand_bytes_ += OS_DOUBLE_WORD;
 400         break;
 401       case 0x01:  // mod == 01
 402         operand_bytes_ += OS_BYTE;
 403         break;
 404       case 0x03:  // mod == 11
 405         // According to the IA-32 docs, there does not seem to be a disp
 406         // value for this value of mod
 407       default:
 408         break;
 409     }
 410   }
 411
 412   (*size)++;
 413   return true;
 414 }
 415
 416 };  // namespace sidestep