2 * Copyright (c) 2011 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
8 * ncdis.c - disassemble using NaCl decoder.
13 #ifndef NACL_TRUSTED_BUT_NOT_TCB
14 #error("This file is not meant for use in the TCB")
23 #include "native_client/src/shared/gio/gio.h"
24 #include "native_client/src/shared/utils/types.h"
25 #include "native_client/src/shared/utils/flags.h"
26 #include "native_client/src/shared/platform/nacl_log.h"
27 #include "native_client/src/trusted/validator/ncfileutil.h"
28 #include "native_client/src/trusted/validator/x86/decoder/nc_inst_state.h"
29 #include "native_client/src/trusted/validator/x86/decoder/ncopcode_desc.h"
30 #include "native_client/src/trusted/validator/x86/decoder/nc_decode_tables.h"
31 #include "native_client/src/trusted/validator/x86/ncval_seg_sfi/ncdecode_verbose.h"
32 #include "native_client/src/trusted/validator/x86/ncval_seg_sfi/ncvalidate_internaltypes.h"
33 #include "native_client/src/trusted/validator_x86/nc_read_segment.h"
34 #include "native_client/src/trusted/validator_x86/ncdis_segments.h"
36 /* True if we should use the full decoder when decoding. */
37 /* TODO(karl): When the full_decoder is working for both the x86-32 and
38 * x86-64 platforms, change to use full decoder for both as default.
40 static Bool NACL_FLAGS_full_decoder =
41 #if NACL_TARGET_SUBARCH == 64
48 /* True if we should use the validator decoder when decoding. */
49 static Bool NACL_FLAGS_validator_decoder =
50 #if NACL_TARGET_SUBARCH == 64
57 /* True if we should print internal representations while decoding. */
58 static Bool NACL_FLAGS_internal = FALSE;
60 /* The name of the executable that is being run. */
61 static const char* exec_name = "???";
63 static void Fatal(const char *fmt, ...) {
66 fprintf(fp, "Fatal: ");
68 vfprintf(fp, fmt, ap);
73 void Info(const char *fmt, ...) {
76 fprintf(fp, "Info: ");
78 vfprintf(fp, fmt, ap);
82 static void usage(void) {
84 "usage: ncdis [options] [file]\n"
88 "\tAdditional command line arguments are specified in the given\n"
89 "\tfile ('#' acts as a comment character). Use '-' as its value to\n"
90 "\tredirect command line arguments from standard input.\n"
92 "\tDisassemble the elf executable using native client's\n"
95 "\tPrint out this usage message\n"
97 "\tDefine code section as sequence of (textual) hexidecimal bytes\n"
98 "\tdefined in the given file. Lines beginning with '#' will be\n"
99 "\treated as comments. If the first non-comment line begins with\n"
100 "\t'@' the following hexidecimal number will be used as the\n"
101 "\tbeginning (RIP/EIP) instruction address of the code segment.\n"
102 "\tUse '-' as its value to redirect standard input as the\n"
103 "\ttext file to process.\n"
105 "\tXXXX specifies the sequence of hexidecimal digits that define\n"
106 "\tan instruction to be decoded.\n"
108 "\tFor the iterator model (only), prints out each the decoded\n"
109 "\tinstruction, followed by the internals for the matched\n"
112 "\tSet program counter (i.e. RIP or EIP) to XXX.\n"
114 "\tProcess input hext_text file in such a way, that it also\n"
115 "\trepresents the output that will be generated by ncdis.\n"
116 "\tThat is, copy comment lines (i.e. lines beginning with\n"
117 "\t'#') to stdout. In addition, it assumes that each line\n"
118 "\tconsists of an '-i' command line argument (and possibly\n"
119 "\ta '--pc' command line argument, followed by a '#',\n"
120 "\tfollowed by the corresponding disassembled text. On such\n"
121 "\tlines, the input is copied up to (and including) the '#'.,\n"
122 "\tand then the disassembled instruction is printed.\n"
123 "--validator_decoder\n"
124 "\tDisassemble the file using the partial instruction decoder used\n"
125 "\tby the validator.\n"
130 /* Converts command line flags to corresponding disassemble flags. */
131 static NaClDisassembleFlags NaClGetDisassembleFlags(void) {
132 NaClDisassembleFlags flags = 0;
133 if (NACL_FLAGS_validator_decoder) {
134 NaClAddBits(flags, NACL_DISASSEMBLE_FLAG(NaClDisassembleValidatorDecoder));
136 if (NACL_FLAGS_full_decoder) {
137 NaClAddBits(flags, NACL_DISASSEMBLE_FLAG(NaClDisassembleFull));
139 if (NACL_FLAGS_internal) {
140 NaClAddBits(flags, NACL_DISASSEMBLE_FLAG(NaClDisassembleAddInternals));
145 static int AnalyzeSections(ncfile *ncf) {
148 const Elf_Shdr* shdr = ncf->sheaders;
150 for (ii = 0; ii < ncf->shnum; ii++) {
151 Info("section %d sh_addr %x offset %x flags %x\n",
152 ii, (uint32_t)shdr[ii].sh_addr,
153 (uint32_t)shdr[ii].sh_offset, (uint32_t)shdr[ii].sh_flags);
154 if ((shdr[ii].sh_flags & SHF_EXECINSTR) != SHF_EXECINSTR)
156 Info("parsing section %d\n", ii);
157 NaClDisassembleSegment(ncf->data + (shdr[ii].sh_addr - ncf->vbase),
158 shdr[ii].sh_addr, shdr[ii].sh_size,
159 NaClGetDisassembleFlags());
164 static void AnalyzeCodeSegments(ncfile *ncf, const char *fname) {
165 if (AnalyzeSections(ncf) < 0) {
166 fprintf(stderr, "%s: text validate failed\n", fname);
170 /* Capture a sequence of bytes defining an instruction (up to a
171 * MAX_BYTES_PER_X86_INSTRUCTION). This sequence is used to run
172 * a (debug) test of the disassembler.
174 static uint8_t FLAGS_decode_instruction[NACL_MAX_BYTES_PER_X86_INSTRUCTION];
176 /* Define the number of bytes supplied for a debug instruction. */
177 static int FLAGS_decode_instruction_size = 0;
179 /* Flag defining the value of the pc to use when decoding an instruction
180 * through decode_instruction.
182 static uint32_t FLAGS_decode_pc = 0;
184 /* Flag defining an input file to use as command line arguments
185 * (one per input line). When specified, run the disassembler
186 * on each command line. The empty string "" denotes that no command
187 * line file was specified. A dash ("-") denotes that standard input
188 * should be used to get command line arguments.
190 static char* FLAGS_commands = "";
192 /* Flag defining the name of a hex text to be used as the code segment. Assumes
193 * that the pc associated with the code segment is defined by
196 static char* FLAGS_hex_text = "";
198 /* Flag, when used in combination with the commands flag, will turn
199 * on input copy rules, making the genrated output contain comments
200 * and the command line arguments as part of the corresponding
201 * generated output. For more details on this, see ProcessInputFile
204 static Bool FLAGS_self_document = FALSE;
207 * Store default values of flags on the first call. On subsequent
208 * calls, resets the flags to the default value.
210 * *WARNING* In order for this to work, this function must be
211 * called before GrokFlags
213 * NOTE: we only allow the specification of -use_iter at the top-level
216 static void ResetFlags(void) {
218 static uint32_t DEFAULT_decode_pc;
219 static char* DEFAULT_commands;
220 static Bool DEFAULT_self_document;
221 static Bool is_first_call = TRUE;
223 DEFAULT_decode_pc = FLAGS_decode_pc;
224 DEFAULT_commands = FLAGS_commands;
225 DEFAULT_self_document = FLAGS_self_document;
226 is_first_call = FALSE;
229 FLAGS_decode_pc = DEFAULT_decode_pc;
230 FLAGS_commands = DEFAULT_commands;
231 FLAGS_self_document = DEFAULT_self_document;
232 /* Always clear the decode instruction. */
233 FLAGS_decode_instruction_size = 0;
234 for (i = 0; i < NACL_MAX_BYTES_PER_X86_INSTRUCTION; ++i) {
235 FLAGS_decode_instruction[i] = 0;
239 /* Returns true if all characters in the string are zero. */
240 static Bool IsZero(const char* arg) {
250 uint8_t HexToByte(const char* hex_value) {
251 unsigned long value = strtoul(hex_value, NULL, 16);
252 /* Verify that arg is all zeros when zero is returned. Otherwise,
253 * assume that the zero value was due to an error.
255 if (0L == value && !IsZero(hex_value)) {
256 Fatal("-i option specifies illegal hex value '%s'\n", hex_value);
258 return (uint8_t) value;
261 /* Recognizes flags in argv, processes them, and then removes them.
262 * Returns the updated value for argc.
264 int GrokFlags(int argc, const char *argv[]) {
267 char* hex_instruction;
269 if (argc == 0) return 0;
272 for (i = 1; i < argc; ++i) {
273 const char* arg = argv[i];
274 if (GrokUint32HexFlag("--pc", arg, &FLAGS_decode_pc) ||
275 GrokCstringFlag("--commands", arg, &FLAGS_commands) ||
276 GrokCstringFlag("--hex_text", arg, &FLAGS_hex_text) ||
277 GrokBoolFlag("--self_document", arg, &FLAGS_self_document) ||
278 GrokBoolFlag("--internal", arg, &NACL_FLAGS_internal) ||
279 GrokBoolFlag("--help", arg, &help)) {
281 } else if (GrokBoolFlag("--validator_decoder", arg,
282 &NACL_FLAGS_validator_decoder)) {
283 NACL_FLAGS_full_decoder = !NACL_FLAGS_validator_decoder;
284 } else if (GrokBoolFlag("--full_decoder", arg,
285 &NACL_FLAGS_full_decoder)) {
286 NACL_FLAGS_validator_decoder = !NACL_FLAGS_full_decoder;
287 } else if (GrokCstringFlag("-i", arg, &hex_instruction)) {
290 char* buf = &(hex_instruction[0]);
293 buffer[i++] = *(buf++);
295 uint8_t byte = HexToByte(buffer);
296 FLAGS_decode_instruction[FLAGS_decode_instruction_size++] = byte;
297 if (FLAGS_decode_instruction_size >
298 NACL_MAX_BYTES_PER_X86_INSTRUCTION) {
299 Fatal("-i=%s specifies too long of a hex value\n", hex_instruction);
305 Fatal("-i=%s doesn't specify a sequence of bytes\n", hex_instruction);
308 argv[new_argc++] = argv[i];
314 /* Process the command line arguments. */
315 static const char* GrokArgv(int argc, const char* argv[]) {
317 Fatal("no filename specified\n");
322 static void ProcessCommandLine(int argc, const char* argv[]);
324 /* Defines the maximum number of characters allowed on an input line
325 * of the input text defined by the commands command line option.
327 #define MAX_INPUT_LINE 4096
329 /* Defines the characters used as (token) separators to recognize command
330 * line arguments when processing lines of text in the text file specified
331 * by the commands command line option.
333 #define CL_SEPARATORS " \t\n"
335 /* Copies the text from the input line (which should be command line options),
336 * up to any trailing comments (i.e. the pound sign).
337 * input_line - The line of text to process.
338 * tokens - The extracted text from the input_line.
339 * max_length - The maximum length of input_line and tokens.
341 * Note: If input_line doesn't end with a null terminator, one is automatically
344 static void CopyCommandLineTokens(char* input_line,
348 for (i = 0; i < max_length; ++i) {
350 if (max_length == i + 1) {
351 /* Be sure we end the string with a null terminator. */
352 input_line[i] = '\0';
356 if (ch == '\0') return;
358 token_text[i] = '\0';
364 /* Tokenize the given text to find command line arguments, and
365 * add them to the given list of command line arguments.
367 * *WARNING* This function will (destructively) modify the
368 * contents of token_text, by converting command line option
369 * separator characters into newlines.
371 static void ExtractTokensAndAddToArgv(
374 const char* argv[]) {
375 /* Note: Assume that each command line argument corresponds to
376 * non-blank text, which is a HACK, but should be sufficient for
379 char* token = strtok(token_text, CL_SEPARATORS);
380 while (token != NULL) {
381 argv[(*argc)++] = token;
382 token = strtok(NULL, CL_SEPARATORS);
386 /* Print out the contents of text, up to the first occurence of the
389 static void PrintUpToPound(const char text[]) {
391 struct Gio* g = NaClLogGetGio();
392 for (i = 0; i < MAX_INPUT_LINE; ++i) {
396 gprintf(g, "%c", ch);
401 gprintf(g, "%c", ch);
407 /* Reads the given text file and processes the command line options specified
408 * inside of it. Each line specifies a separate sequence of command line
409 * arguments to process.
412 * (a) The '#' is used as a comment delimiter.
413 * (b) whitespace lines are ignored.
414 * (c) If flag --self_document is specified, comment lines and whitespace
415 * lines will automatically be copied to stdout. In addition, command
416 * line arguments will be copied to stdout before processing them.
417 * Further, if the command line arguments are followed by a comment,
418 * only text up to (and including) the '#' will be copied. This allows
419 * the input file to contain the (hopefully single lined) output that
420 * would be generated by the given command line arguments. Therefore,
421 * if set up correctly, the output of the disassembler (in this case)
422 * should be the same as the input file (making it easy to use the
423 * input file as the the corresponding GOLD file to test against).
425 static void ProcessInputFile(FILE* file) {
426 char input_line[MAX_INPUT_LINE];
427 const Bool self_document = FLAGS_self_document;
428 while (fgets(input_line, MAX_INPUT_LINE, file) != NULL) {
429 char token_text[MAX_INPUT_LINE];
430 const char* line_argv[MAX_INPUT_LINE];
433 /* Copy the input line (up to the first #) into token_text */
434 CopyCommandLineTokens(input_line, token_text, MAX_INPUT_LINE);
436 /* Tokenize the commands to build argv.
437 * Note: Since each token is separated by a blank,
438 * and the input is no more than MAX_INPUT_LINE,
439 * we know (without checking) that line_argc
440 * will not exceed MAX_INPUT_LINE.
442 line_argv[line_argc++] = exec_name;
443 ExtractTokensAndAddToArgv(token_text, &line_argc, line_argv);
445 /* Process the parsed input line. */
446 if (1 == line_argc) {
447 /* No command line arguments. */
449 printf("%s", input_line);
452 /* Process the tokenized command line. */
454 PrintUpToPound(input_line);
456 ProcessCommandLine(line_argc, line_argv);
462 /* Run the disassembler using the given command line arguments. */
463 static void ProcessCommandLine(int argc, const char* argv[]) {
467 new_argc = GrokFlags(argc, argv);
468 if (FLAGS_decode_instruction_size > 0) {
469 /* Command line options specify an instruction to decode, run
470 * the disassembler on the instruction to print out the decoded
474 Fatal("unrecognized option '%s'\n", argv[1]);
476 NaClDisassembleSegment(FLAGS_decode_instruction, FLAGS_decode_pc,
477 FLAGS_decode_instruction_size,
478 NaClGetDisassembleFlags());
479 } else if (0 != strcmp(FLAGS_hex_text, "")) {
480 uint8_t bytes[MAX_INPUT_LINE];
483 if (0 == strcmp(FLAGS_hex_text, "-")) {
484 num_bytes = NaClReadHexTextWithPc(stdin, &pc, bytes, MAX_INPUT_LINE);
485 NaClDisassembleSegment(bytes, pc, (NaClMemorySize) num_bytes,
486 NaClGetDisassembleFlags());
488 FILE* input = fopen(FLAGS_hex_text, "r");
490 Fatal("Can't open hex text file: %s\n", FLAGS_hex_text);
492 num_bytes = NaClReadHexTextWithPc(input, &pc, bytes, MAX_INPUT_LINE);
494 NaClDisassembleSegment(bytes, pc, (NaClMemorySize) num_bytes,
495 NaClGetDisassembleFlags());
497 } else if (0 != strcmp(FLAGS_commands, "")) {
498 /* Use the given input file to find command line arguments,
501 if (0 == strcmp(FLAGS_commands, "-")) {
502 ProcessInputFile(stdin);
504 FILE* input = fopen(FLAGS_commands, "r");
506 Fatal("Can't open commands file: %s\n", FLAGS_commands);
508 ProcessInputFile(input);
512 /* Command line should specify an executable to disassemble.
513 * Read the file and disassemble it.
516 const char* filename = GrokArgv(new_argc, argv);
518 Info("processing %s", filename);
519 ncf = nc_loadfile_depending(filename, NULL);
521 Fatal("nc_loadfile(%s): %s\n", filename, strerror(errno));
524 AnalyzeCodeSegments(ncf, filename);
530 int main(int argc, const char *argv[]) {
531 struct GioFile gout_file;
532 struct Gio* gout = (struct Gio*) &gout_file;
533 if (!GioFileRefCtor(&gout_file, stdout)) {
534 fprintf(stderr, "Unable to create gio file for stdout!\n");
537 NaClLogModuleInitExtended(LOG_INFO, gout);
538 ProcessCommandLine(argc, argv);