1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
6 // - Make capturing system call arguments optional and the number configurable.
7 // - Lots of places depend on the ABI so that we can modify EAX or EDX, this
8 // is safe, but these could be moved to be saved and restored anyway.
9 // - Understand the loader better, and make some more meaningful hooks with
10 // proper data collection and durations. Right now it's just noise.
11 // - Get the returned pointer from AllocateHeap.
20 #include "assembler.h"
23 #include "sym_resolver.h"
24 #include "syscall_map.h"
26 #include "sidestep/mini_disassembler.h"
30 std::string JSONString(const std::string& str) {
31 static const char hextable[] = "0123456789abcdef";
34 for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) {
35 unsigned char c = static_cast<unsigned char>(*it);
40 out.push_back('\\'); out.push_back(c);
43 if (c < 20 || c >= 127) {
44 out.push_back('\\'); out.push_back('x');
45 out.push_back(hextable[c >> 4]); out.push_back(hextable[c & 0xf]);
61 static const int kPlaygroundSize = 64 * 1024 * 1024;
63 // Encapsulate the configuration options to the playground.
67 : stack_unwind_depth_(0),
73 // The maximum amount of frames we should unwind from the call stack.
74 int stack_unwind_depth() { return stack_unwind_depth_; }
75 void set_stack_unwind_depth(int depth) { stack_unwind_depth_ = depth; }
77 // Whether we should log heap operations (alloc / free).
78 bool log_heap() { return log_heap_; }
79 void set_log_heap(bool x) { log_heap_ = x; }
81 // Whether we should log lock (critical section) operations.
82 bool log_lock() { return log_lock_; }
83 void set_log_lock(bool x) { log_lock_ = x; }
85 // Whether we are running on Vista.
86 bool vista() { return vista_; }
87 void set_vista(bool x) { vista_ = x; }
90 int stack_unwind_depth_;
96 Playground(HANDLE proc, const Options& options)
99 resolver_("ntdll.dll"),
101 // We copy the entire playground into the remote process, and we have
102 // fields that we expect to be zero. TODO this could be a lot better.
103 memset(buf_, 0, sizeof(buf_));
106 void AllocateInRemote() {
107 // Try to get something out of the way and easy to debug.
108 static void* kPlaygroundAddr = reinterpret_cast<void*>(0x66660000);
109 // Allocate our playground memory in the target process. This is a big
110 // slab of read/write/execute memory that we use for our code
111 // instrumentation, and the memory for writing out our logging events.
112 remote_addr_ = reinterpret_cast<char*>(
113 VirtualAllocEx(proc_,
116 MEM_COMMIT | MEM_RESERVE,
117 PAGE_EXECUTE_READWRITE));
118 if (remote_addr_ == NULL || remote_addr_ != kPlaygroundAddr) {
119 NOTREACHED("Falied to allocate playground: 0x%08x", remote_addr_);
123 void CopyToRemote() {
124 WriteProcessMemory(proc_,
131 void CopyFromRemote() {
133 ReadProcessMemory(proc_,
140 enum EventRecordType {
142 EVENT_TYPE_THREADBEGIN = 1,
143 EVENT_TYPE_THREADNAME = 2,
144 EVENT_TYPE_EXCEPTION = 3,
145 EVENT_TYPE_PROCESSEXIT = 4,
146 EVENT_TYPE_CREATETHREAD = 5,
147 EVENT_TYPE_THREADEXIT = 6,
148 EVENT_TYPE_ALLOCHEAP = 7,
149 EVENT_TYPE_FREEHEAP = 8,
150 EVENT_TYPE_SYSCALL = 9,
151 EVENT_TYPE_ENTER_CS = 10,
152 EVENT_TYPE_TRYENTER_CS = 11,
153 EVENT_TYPE_LEAVE_CS = 12,
157 static const int kThreadNameBufSize = 64;
158 static const int kLdrBufSize = 512; // Looks like internal buffer is 512.
160 static const int kCodeBlockSize = 256;
162 static const int kOffLdrCode = 0 * kCodeBlockSize;
163 static const int kOffCreateThreadCode = 1 * kCodeBlockSize;
164 static const int kOffThreadCode = 2 * kCodeBlockSize;
165 static const int kOffExpCode = 3 * kCodeBlockSize;
166 static const int kOffExitCode = 4 * kCodeBlockSize;
167 static const int kOffThreadExitCode = 5 * kCodeBlockSize;
168 static const int kOffAllocHeapCode = 6 * kCodeBlockSize;
169 static const int kOffFreeHeapCode = 7 * kCodeBlockSize;
170 static const int kOffSyscallCode = 8 * kCodeBlockSize;
171 static const int kOffEnterCritSecCode = 9 * kCodeBlockSize;
172 static const int kOffTryEnterCritSecCode = 10 * kCodeBlockSize;
173 static const int kOffLeaveCritSecCode = 11 * kCodeBlockSize;
174 static const int kOffApcDispCode = 12 * kCodeBlockSize;
176 static const int kOffLogAreaPtr = 4096;
177 static const int kOffLogAreaData = 4096 + 4;
179 static const int kRecordHeaderSize = 8 + 4 + 4 + 4;
181 // Given the address to the start of a function, patch the function to jump
182 // to a given offset into the playground. This function will try to take
183 // advantage of hotpatch code, if the function is prefixed with 5 0x90 bytes.
184 // Returns a std::string of any assembly instructions that must be relocated,
185 // as they were overwritten during patching.
186 std::string PatchPreamble(int func_addr, int playground_off) {
187 sidestep::MiniDisassembler disas;
188 int stub_addr = reinterpret_cast<int>(remote_addr_ + playground_off);
193 if (ReadProcessMemory(proc_,
194 reinterpret_cast<void*>(func_addr - 5),
198 NOTREACHED("ReadProcessMemory(0x%08x) failed: %d",
199 func_addr - 5, GetLastError());
202 // TODO(deanm): It seems in more recent updates the compiler is generating
203 // complicated sequences for padding / alignment. For example:
204 // 00000000 8DA42400000000 lea esp,[esp+0x0]
205 // 00000007 8D4900 lea ecx,[ecx+0x0]
206 // is used for a 16 byte alignment. We need a better way of handling this.
207 if (memcmp(buf, "\x90\x90\x90\x90\x90", 5) == 0 ||
208 memcmp(buf, "\x00\x8D\x64\x24\x00", 5) == 0 ||
209 memcmp(buf, "\x00\x00\x8D\x49\x00", 5) == 0) {
210 unsigned int instr_bytes = 0;
212 // We might have a hotpatch no-op of mov edi, edi "\x8b\xff". It is a
213 // bit of a waste to relocate it, but it makes everything simpler.
215 while (instr_bytes < 2) {
216 if (disas.Disassemble(
217 reinterpret_cast<unsigned char*>(buf + 5 + instr_bytes),
218 &instr_bytes) != sidestep::IT_GENERIC) {
219 NOTREACHED("Could not disassemble or relocate instruction.");
221 // We only read 10 bytes worth of instructions.
222 CHECK(instr_bytes < 10);
225 instrs.assign(buf + 5, instr_bytes);
227 // We have a hotpatch prefix of 5 nop bytes. We can use this for our
228 // long jump, and then overwrite the first 2 bytes to jump back to there.
229 CodeBuffer patch(buf);
230 int off = stub_addr - func_addr;
232 patch.jmp_rel_short(-2 - 5);
234 // We need a full 5 bytes for the jump.
235 unsigned int instr_bytes = 0;
236 while (instr_bytes < 5) {
237 if (disas.Disassemble(
238 reinterpret_cast<unsigned char*>(buf + 5 + instr_bytes),
239 &instr_bytes) != sidestep::IT_GENERIC) {
240 NOTREACHED("Could not disassemble or relocate instruction.");
242 // We only read 10 bytes worth of instructions.
243 CHECK(instr_bytes < 10);
246 instrs.assign(buf + 5, instr_bytes);
248 // Overwrite the first 5 bytes with a relative jump to our stub.
249 CodeBuffer patch(buf + 5);
250 int off = stub_addr - (func_addr + 5);
254 // Write back the bytes, we are really probably writing more back than we
255 // need to, but it shouldn't really matter.
256 if (WriteProcessMemory(proc_,
257 reinterpret_cast<void*>(func_addr - 5),
261 NOTREACHED("WriteProcessMemory(0x%08x) failed: %d",
262 func_addr - 5, GetLastError());
268 std::string PatchPreamble(const char* func_name, int playground_off) {
269 return PatchPreamble(
270 reinterpret_cast<int>(resolver_.Resolve(func_name)), playground_off);
273 // Restore any instructions that needed to be moved to make space for our
274 // patch and jump back to the original code.
275 void ResumeOriginalFunction(const char* func_name,
276 const std::string& moved_instructions,
279 cb->emit_bytes(moved_instructions);
280 int off = resolver_.Resolve(func_name) +
281 moved_instructions.size() -
282 (remote_addr_ + stub_offset + cb->size() + 5);
286 // Makes a call to NtQueryPerformanceCounter, writing the timestamp to the
287 // buffer pointed to by EDI. EDI it not incremented. EAX is not preserved.
288 void AssembleQueryPerformanceCounter(CodeBuffer* cb) {
289 // Make a call to NtQueryPerformanceCounter and write the result into
290 // the log area. The buffer we write to should be aligned, but we should
291 // garantee that anyway for the logging area for performance.
292 cb->push_imm(0); // PerformanceFrequency
293 cb->push(EDI); // PerformanceCounter
294 cb->mov_imm(EAX, reinterpret_cast<int>(
295 resolver_.Resolve("ntdll!NtQueryPerformanceCounter")));
299 // This is the common log setup routine. It will allocate a new log entry,
300 // and write out the common log header to the event entry. The header is:
301 // is [ 64bit QPC ] [ 32bit cpu id ] [ 32bit thread id ] [ 32bit rec id ]
302 // EDI will be left pointing to the log entry, with |space| bytes left for
303 // type specific data. All other registers should not be clobbered.
304 void AssembleHeaderCode(CodeBuffer* cb, EventRecordType rt, int space) {
310 int unwind_depth = options_.stack_unwind_depth();
312 // Load EDI with the number of bytes we want for our log entry, this will
313 // be used in the atomic increment to allocate the log entry.
314 cb->mov_imm(EDI, kRecordHeaderSize + (unwind_depth * 4) + space);
315 // Do the increment and have EDI point to our log entry buffer space.
316 cb->mov_imm(EDX, reinterpret_cast<int>(remote_addr_ + kOffLogAreaPtr));
317 cb->inc_atomic(EDX, EDI);
318 // EDI is the buffer offset, make it a pointer to the record entry.
319 cb->add_imm(EDI, reinterpret_cast<int>(remote_addr_ + kOffLogAreaData));
321 AssembleQueryPerformanceCounter(cb);
330 // Stack unwinding, follow EBP to the maximum number of frames, and make
331 // sure that it stays on the stack (between ESP and TEB.StackBase).
332 if (unwind_depth > 0) {
333 cb->mov_imm(ECX, unwind_depth);
334 cb->fs(); cb->mov(EDX, Operand(0x04)); // get TEB.StackBase
339 Label unwind_loop, bail;
340 cb->bind(&unwind_loop);
342 // Bail if (EAX < ESP) (below the stack)
344 cb->jcc(below, &bail);
345 // Bail if (EAX >= EDX) (above the stack)
347 cb->jcc(above_equal, &bail);
349 // We have a valid stack pointer, it should point to something like:
350 // [ saved frame pointer ] [ return address ] [ arguments ... ]
352 cb->lodsd(); // Get the new stack pointer to follow in EAX
353 cb->movsd(); // Copy the return address to the log area.
355 cb->loop(&unwind_loop);
358 // If we did managed to unwind to the max, fill the rest with 0 (really
359 // we just want to inc EDI to the end, and this is an easy way).
360 cb->mov_imm(EAX, 0); // TODO use an xor
361 cb->rep(); cb->stosd();
364 // Store the type for this record entry.
365 cb->mov_imm(EAX, rt);
375 static const EventRecordType kRecordType = EVENT_TYPE_LDR;
376 static const char* kFuncName = "ntdll!DebugPrint";
377 static const int kStubOffset = kOffLdrCode;
379 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
381 // Set ShowSnaps to one to get the print routines to be called.
384 proc_, resolver_.Resolve("ntdll!ShowSnaps"), &enabled, 1, NULL);
386 CodeBuffer cb(buf_ + kStubOffset);
388 cb.pop(EDX); // return address
389 cb.pop(EAX); // First param in eax
394 cb.mov(ESI, EAX); // ESI points at the string structure.
396 // We used to do variable length based on the length supplied in the str
397 // structure, but it's easier (and sloppier) to just copy a fixed amount.
398 AssembleHeaderCode(&cb, kRecordType, kLdrBufSize);
400 cb.lodsd(); // Load the character count
401 cb.lodsd(); // Load the char*
403 cb.mov_imm(ECX, kLdrBufSize / 4); // load the char count as the rep count
404 cb.rep(); cb.movsb(); // Copy the string to the logging buffer
410 cb.pop(ECX); // don't care
411 cb.pop(ECX); // don't care
415 void PatchCreateThread() {
416 static const EventRecordType kRecordType = EVENT_TYPE_CREATETHREAD;
417 static const char* kFuncName =
418 options_.vista() ? "ntdll!NtCreateThreadEx" : "ntdll!NtCreateThread";
419 static const int kStubOffset = kOffCreateThreadCode;
421 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
423 CodeBuffer cb(buf_ + kStubOffset);
428 AssembleHeaderCode(&cb, kRecordType, 8);
430 cb.mov(EAX, Operand(ESP, 0x18 + 8));
432 // Super ugly hack. To coorrelate between creating a thread and the new
433 // thread running, we stash something to identify the creating event when
434 // we log the created event. We just use a pointer to the event log data
435 // since this will be unique and can tie the two events together. We pass
436 // it by writing into the context structure, so it will be passed in ESI.
437 cb.add_imm(EAX, 0xa0);
444 // Get and save CONTEXT.Eip
449 cb.stosd(); // Record the event identifier to tie together the events.
450 cb.movsd(); // write Eip to the log event
455 ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
458 void PatchThreadBegin() {
459 static const EventRecordType kRecordType = EVENT_TYPE_THREADBEGIN;
460 static const char* kFuncName = "ntdll!CsrNewThread";
461 static const int kStubOffset = kOffThreadCode;
463 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
465 CodeBuffer cb(buf_ + kStubOffset);
469 AssembleHeaderCode(&cb, kRecordType, 8);
471 cb.mov(EAX, ESI); // We stashed the creator's eventid in the context ESI.
474 // TODO(deanm): The pointer is going to point into the CRT or something,
475 // should we dig deeper to get more information about the real entry?
476 cb.mov(EAX, Operand(EBP, 0x8));
480 ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
483 void PatchThreadBeginVista() {
484 static const EventRecordType kRecordType = EVENT_TYPE_THREADBEGIN;
485 static const char* kFuncName = "ntdll!_RtlUserThreadStart";
486 static const int kStubOffset = kOffThreadCode;
488 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
490 CodeBuffer cb(buf_ + kStubOffset);
494 AssembleHeaderCode(&cb, kRecordType, 8);
496 cb.mov(EAX, ESI); // We stashed the creator's eventid in the context ESI.
499 // TODO(deanm): The pointer is going to point into the CRT or something,
500 // should we dig deeper to get more information about the real entry?
501 //cb.mov(EAX, Operand(EBP, 0x8));
506 ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
509 // Intercept exception dispatching so we can catch when threads set a thread
510 // name (which is an exception with a special code). TODO it could be
511 // useful to log all exceptions.
512 void PatchSetThreadName() {
513 static const EventRecordType kRecordType = EVENT_TYPE_THREADNAME;
514 static const char* kFuncName = "ntdll!RtlDispatchException";
515 static const int kStubOffset = kOffExpCode;
517 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
519 CodeBuffer cb(buf_ + kStubOffset);
521 cb.pop(EDX); // return address
522 cb.pop(EAX); // ExceptionRecord
533 cb.cmp_imm(EAX, 0x406D1388);
534 cb.jcc(not_equal, &bail);
538 AssembleHeaderCode(&cb, kRecordType, kThreadNameBufSize);
540 // Fetch the second parameter.
541 for (int i = 0; i < 6; ++i) {
545 // TODO This is sloppy and we could run into unmapped memory...
547 cb.mov_imm(ECX, kThreadNameBufSize / 4);
548 cb.rep(); cb.movsd();
555 ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
559 void PatchThreadExit() {
560 static const EventRecordType kRecordType = EVENT_TYPE_THREADEXIT;
561 static const char* kFuncName = "ntdll!LdrShutdownThread";
562 static const int kStubOffset = kOffThreadExitCode;
564 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
565 CodeBuffer cb(buf_ + kStubOffset);
568 AssembleHeaderCode(&cb, kRecordType, 0);
571 ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
574 void PatchAllocateHeap() {
575 static const EventRecordType kRecordType = EVENT_TYPE_ALLOCHEAP;
576 static const char* kFuncName = "ntdll!RtlAllocateHeap";
577 static const int kStubOffset = kOffAllocHeapCode;
579 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
580 CodeBuffer cb(buf_ + kStubOffset);
585 AssembleHeaderCode(&cb, kRecordType, 12);
588 cb.add_imm(ESI, 12); // Skip over our saved and the return address
589 cb.movsd(); cb.movsd(); cb.movsd(); // Copy the 3 parameters
594 ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
597 void PatchFreeHeap() {
598 static const EventRecordType kRecordType = EVENT_TYPE_FREEHEAP;
599 static const char* kFuncName = "ntdll!RtlFreeHeap";
600 static const int kStubOffset = kOffFreeHeapCode;
602 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
603 CodeBuffer cb(buf_ + kStubOffset);
608 AssembleHeaderCode(&cb, kRecordType, 12);
611 cb.add_imm(ESI, 12); // Skip over our saved and the return address
612 cb.movsd(); cb.movsd(); cb.movsd(); // Copy the 3 parameters
617 ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
620 // Don't even bother going back to the original code, just implement our
621 // own KiFastSystemCall. The original looks like:
622 // .text:7C90EB8B mov edx, esp
623 // .text:7C90EB8D sysenter
624 // .text:7C90EB8F nop
625 // .text:7C90EB90 nop
626 // .text:7C90EB91 nop
627 // .text:7C90EB92 nop
628 // .text:7C90EB93 nop
629 void PatchSyscall() {
630 static const EventRecordType kRecordType = EVENT_TYPE_SYSCALL;
631 static const char* kFuncName = "ntdll!KiFastSystemCall";
632 static const int kStubOffset = kOffSyscallCode;
634 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
637 CodeBuffer cb(buf_ + kStubOffset);
641 // Skip 0xa5 which is QueryPerformanceCounter, to make sure we don't log
642 // our own logging's QPC. Disabled for now, using ret addr check...
643 // cb.cmp_imm(EAX, 0xa5);
644 // cb.jcc(equal, &skip);
646 // Check if the return address is from 0x6666 (our code region).
647 // 66817C24066666 cmp word [esp+0x6],0x6666
648 cb.emit(0x66); cb.emit(0x81); cb.emit(0x7C);
649 cb.emit(0x24); cb.emit(0x06); cb.emit(0x66); cb.emit(0x66);
650 cb.jcc(equal, &skip);
652 // This is all a bit shit. Originally I thought I could store some state
653 // on the stack above ESP, however, it seems that when APCs, etc are
654 // queued, they will use the stack above ESP. Well, not above ESP, above
655 // what was passed in as EDX into the systemcall, not matter if ESP was
656 // different than this :(. So we need to store our state in the event
657 // log record, and then we stick a pointer to that over a ret addr...
659 // Our stack starts like:
660 // [ ret addr ] [ ret addr 2 ] [ arguments ]
661 // We will update it to look like
662 // [ ret stub addr ] [ event entry ptr ] [ arguments ]
664 cb.push(EDI); // save EDI since we're using it
665 AssembleHeaderCode(&cb, kRecordType, 16 + 16 + 8);
666 cb.mov(EDX, EAX); // Save EAX...
667 cb.stosd(); // eax is the syscall number
669 cb.stosd(); // store the saved EDI
671 cb.stosd(); // store the real return address
673 cb.stosd(); // store the real (secondary) return address;
678 cb.movsd(); // argument 1
679 cb.movsd(); // argument 2
680 cb.movsd(); // argument 3
683 cb.push(EDI); // store our event ptr over the secondary ret addr.
684 cb.push_imm(reinterpret_cast<int>(remote_addr_ + kOffSyscallCode + 200));
685 cb.mov(EAX, EDX); // restore EAX
691 if (cb.size() > 200) {
692 NOTREACHED("code too big: %d", cb.size());
697 CodeBuffer cb(buf_ + kStubOffset + 200);
699 // TODO share the QPC code, this is a copy and paste...
701 cb.pop(EDI); // get the log area
703 cb.stosd(); // Log the system call return value.
705 // QPC will clobber EAX, and it's very important to save it since it
706 // is the return value from the system call. TODO validate if there is
707 // anything else we need to save...
709 AssembleQueryPerformanceCounter(&cb);
713 // - Restore the original "seconary" return address
714 // - Restore the original value of the EDI register
715 // - Jump control flow to the original return address
716 // All 3 of these values are stored in the log record...
717 // [ syscall num ] [ saved edi ] [ real rets ] [ args ] [ retval ] [ ts ]
718 // currently edi points here ----^
720 cb.push(Operand(EDI, -4 - 16)); // push the real 2nd ret
721 cb.push(Operand(EDI, -8 - 16)); // push the real ret
722 cb.push(Operand(EDI, -12 - 16)); // push the saved EDI
724 cb.pop(EDI); // restore EDI that was saved in the record
725 cb.ret(); // jmp back to the real ret ...
727 if (cb.size() > 56) {
733 // Patch lock (criticial section) holding.
734 void PatchEnterCriticalSection() {
735 static const EventRecordType kRecordType = EVENT_TYPE_ENTER_CS;
736 static const char* kFuncName = "ntdll!RtlEnterCriticalSection";
737 static const int kStubOffset = kOffEnterCritSecCode;
739 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
741 // We just want to capture the return address and original argument, so
742 // we know when EnterCriticalSection returned, we don't want to know when
743 // it entered because it could sit waiting. We want to know when the lock
744 // actually started being held. The compiler will sometimes generated code
745 // that overwrites arguments, so we'll keep a copy of the argument just in
746 // case code like this is ever generated in the future. TODO is it enough
747 // to just assume a LPCRITICAL_SECTION uniquely identifies the lock, or
748 // can the same lock have multiple different copies, I would assume not.
750 CodeBuffer cb(buf_ + kStubOffset);
752 // Set up an additional frame so that we capture the return.
753 // TODO use memory instructions instead of using registers.
754 cb.pop(EAX); // return address
755 cb.pop(EDX); // first argument (critical section pointer)
761 reinterpret_cast<int>(remote_addr_ + kStubOffset + 40));
763 ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
764 CHECK(cb.size() < 40);
768 CodeBuffer cb(buf_ + kStubOffset + 40);
775 AssembleHeaderCode(&cb, kRecordType, 4);
777 cb.lodsd(); // Skip over our saved ESI
778 cb.lodsd(); // Skip over the return address
779 cb.movsd(); // Write the CRITICAL_SECTION* to the event record.
789 void PatchTryEnterCriticalSection() {
790 static const EventRecordType kRecordType = EVENT_TYPE_TRYENTER_CS;
791 static const char* kFuncName = "ntdll!RtlTryEnterCriticalSection";
792 static const int kStubOffset = kOffTryEnterCritSecCode;
794 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
797 CodeBuffer cb(buf_ + kStubOffset);
799 // Set up an additional frame so that we capture the return.
800 // TODO use memory instructions instead of using registers.
801 cb.pop(EAX); // return address
802 cb.pop(EDX); // first argument (critical section pointer)
807 cb.push_imm(reinterpret_cast<int>(remote_addr_ + kStubOffset + 40));
809 ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
810 CHECK(cb.size() < 40);
814 CodeBuffer cb(buf_ + kStubOffset + 40);
822 AssembleHeaderCode(&cb, kRecordType, 8);
824 cb.lodsd(); // Skip over our saved ESI
825 cb.lodsd(); // Skip over the return address
826 cb.movsd(); // Write the CRITICAL_SECTION* to the event record.
829 cb.stosd(); // Write the return value to the event record.
838 void PatchLeaveCriticalSection() {
839 static const EventRecordType kRecordType = EVENT_TYPE_LEAVE_CS;
840 static const char* kFuncName = "ntdll!RtlLeaveCriticalSection";
841 static const int kStubOffset = kOffLeaveCritSecCode;
843 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
844 CodeBuffer cb(buf_ + kStubOffset);
846 // TODO use memory instructions instead of using registers.
847 cb.pop(EDX); // return address
848 cb.pop(EAX); // first argument (critical section pointer)
853 AssembleHeaderCode(&cb, kRecordType, 4);
854 cb.stosd(); // Write the CRITICAL_SECTION* to the event record.
857 ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
860 // Patch APC dispatching. This is a bit hacky, since the return to kernel
861 // mode is done with NtContinue, we have to shim in a stub return address to
862 // catch when the callback is finished. It is probably a bit fragile.
863 void PatchApcDispatcher() {
864 static const EventRecordType kRecordType = EVENT_TYPE_APC;
865 static const char* kFuncName = "ntdll!KiUserApcDispatcher";
866 static const int kStubOffset = kOffApcDispCode;
868 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
871 CodeBuffer cb(buf_ + kStubOffset);
873 // We don't really need to preserve these since we're the first thing
874 // executing from the kernel dispatch, but yeah, it is good practice.
878 AssembleHeaderCode(&cb, kRecordType, 4 + 4 + 8);
880 cb.mov_imm(EAX, reinterpret_cast<int>(remote_addr_ + kStubOffset + 140));
881 cb.xchg(EAX, Operand(ESP, 8)); // Swap the callback address with ours.
882 cb.stosd(); // Store the original callback function address.
884 // TODO for now we're lazy and depend that ESI will be preserved, and we
885 // use it to store the pointer into our log record. EDI isn't preserved.
891 ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
893 CHECK(cb.size() < 140);
896 CodeBuffer cb(buf_ + kStubOffset + 140);
898 // This is our shim, we need to call the original callback function, then
899 // we can catch the return and log when it was completed.
900 cb.pop(EAX); // The real return address, safe to use EAX w/ the ABI?
904 cb.stosd(); // Store the real return address, we'll need it.
907 cb.lodsd(); // Load the real callback address.
912 cb.call(EAX); // Call the original callback address.
918 AssembleQueryPerformanceCounter(&cb);
923 cb.push(Operand(ESI, -4)); // Push the real return address.
924 cb.ret(); // Return back to the APC Dispatcher.
926 CHECK(cb.size() < 50);
930 // We need to hook into process shutdown for two reasons. Most importantly,
931 // we need to copy the playground back from the process before the address
932 // space goes away. We could avoid this with shared memory, however, there
933 // is a reason two. In order to capture symbols for all of the libraries
934 // loaded into arbitrary applications, on shutdown we do an instrusive load
935 // of symbols into the traced process.
937 // ntdll!LdrShutdownProcess
938 // - NtSetEvent(event, 0);
939 // - NtWaitForSingleObject(event, FALSE, NULL);
941 void PatchExit(HANDLE exiting, HANDLE exited) {
942 static const EventRecordType kRecordType = EVENT_TYPE_PROCESSEXIT;
943 static const char* kFuncName = "ntdll!LdrShutdownProcess";
944 static const int kStubOffset = kOffExitCode;
946 HANDLE rexiting, rexited;
947 if (!DuplicateHandle(::GetCurrentProcess(),
953 DUPLICATE_SAME_ACCESS)) {
956 if (!DuplicateHandle(::GetCurrentProcess(),
962 DUPLICATE_SAME_ACCESS)) {
966 std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
967 CodeBuffer cb(buf_ + kStubOffset);
970 AssembleHeaderCode(&cb, kRecordType, 0);
973 // NtSetEvent(exiting, 0);
975 cb.push_imm(reinterpret_cast<int>(rexiting));
976 cb.mov_imm(EAX, reinterpret_cast<int>(
977 resolver_.Resolve("ntdll!NtSetEvent")));
980 // NtWaitForSingleObject(exited, FALSE, INFINITE);
983 cb.push_imm(reinterpret_cast<int>(rexited));
984 cb.mov_imm(EAX, reinterpret_cast<int>(
985 resolver_.Resolve("ntdll!NtWaitForSingleObject")));
988 ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
993 if (options_.vista()) {
994 // TODO(deanm): Make PatchCreateThread work on Vista.
995 PatchThreadBeginVista();
1002 PatchSetThreadName();
1005 PatchApcDispatcher();
1007 // The loader logging needs to be improved a bit to really be useful.
1010 // These are interesting, but will collect a ton of data:
1011 if (options_.log_heap()) {
1012 PatchAllocateHeap();
1015 if (options_.log_lock()) {
1016 PatchEnterCriticalSection();
1017 PatchTryEnterCriticalSection();
1018 PatchLeaveCriticalSection();
1022 // Dump the event records from the playground to stdout in a JSON format.
1023 // TODO: Drop RDTSCNormalizer, it was from old code that tried to use the
1024 // rdtsc counters from the CPU, and this required a bunch of normalization
1025 // to account for non-syncronized timestamps across different cores, etc.
1026 void DumpJSON(RDTSCNormalizer* rdn, SymResolver* res) {
1027 int pos = kOffLogAreaPtr;
1031 std::map<int, const char*> syscalls = CreateSyscallMap();
1033 printf("parseEvents([\n");
1034 for (int end = pos + i; pos < end; ) {
1036 __int64 ts = Int64At(pos);
1038 void* cpuid = reinterpret_cast<void*>(IntAt(pos));
1040 printf("'ms': %f,\n", rdn->MsFromStart(cpuid, ts));
1042 printf("'cpu': 0x%x,\n'thread': 0x%x,\n", cpuid, IntAt(pos));
1045 if (options_.stack_unwind_depth() > 0) {
1046 printf("'stacktrace': [\n");
1047 for (int i = 0; i < options_.stack_unwind_depth(); ++i) {
1048 int retaddr = IntAt(pos + (i * 4));
1051 printf(" [ 0x%x, %s ],\n",
1053 res ? JSONString(res->Unresolve(retaddr)).c_str() : "\"\"");
1056 pos += (options_.stack_unwind_depth() * 4);
1060 EventRecordType rt = static_cast<EventRecordType>(IntAt(pos));
1064 case EVENT_TYPE_LDR:
1066 printf("'eventtype': 'EVENT_TYPE_LDR',\n");
1067 std::string str(&buf_[pos], kLdrBufSize);
1068 str = str.substr(0, str.find('\0'));
1069 printf("'ldrinfo': %s,\n", JSONString(str).c_str());
1073 case EVENT_TYPE_CREATETHREAD:
1075 printf("'eventtype': 'EVENT_TYPE_CREATETHREAD',\n"
1076 "'eventid': 0x%x,\n"
1077 "'startaddr': 0x%x,\n",
1078 IntAt(pos), IntAt(pos+4));
1082 case EVENT_TYPE_THREADBEGIN:
1084 printf("'eventtype': 'EVENT_TYPE_THREADBEGIN',\n"
1085 "'parenteventid': 0x%x,\n"
1086 "'startaddr': 0x%x,\n",
1087 IntAt(pos), IntAt(pos+4));
1091 case EVENT_TYPE_THREADNAME:
1093 std::string str(&buf_[pos], kThreadNameBufSize);
1094 str = str.substr(0, str.find('\0'));
1095 printf("'eventtype': 'EVENT_TYPE_THREADNAME',\n"
1096 "'threadname': %s,\n",
1097 JSONString(str).c_str());
1098 pos += kThreadNameBufSize;
1101 case EVENT_TYPE_PROCESSEXIT:
1103 printf("'eventtype': 'EVENT_TYPE_PROCESSEXIT',\n");
1106 case EVENT_TYPE_THREADEXIT:
1108 printf("'eventtype': 'EVENT_TYPE_THREADEXIT',\n");
1111 case EVENT_TYPE_ALLOCHEAP:
1113 printf("'eventtype': 'EVENT_TYPE_ALLOCHEAP',\n"
1114 "'heaphandle': 0x%x,\n"
1115 "'heapflags': 0x%x,\n"
1116 "'heapsize': %d,\n",
1117 IntAt(pos), IntAt(pos+4), IntAt(pos+8));
1121 case EVENT_TYPE_FREEHEAP:
1123 printf("'eventtype': 'EVENT_TYPE_FREEHEAP',\n"
1124 "'heaphandle': 0x%x,\n"
1125 "'heapflags': 0x%x,\n"
1127 IntAt(pos), IntAt(pos+4), IntAt(pos+8));
1131 case EVENT_TYPE_SYSCALL:
1133 int syscall = IntAt(pos);
1134 printf("'eventtype': 'EVENT_TYPE_SYSCALL',\n"
1135 "'syscall': 0x%x,\n", syscall);
1138 printf("'syscallargs': [\n");
1139 for (int i = 0; i < 3; ++i) {
1140 printf(" 0x%x,\n", IntAt(pos));
1145 printf("'retval': 0x%x,\n"
1147 IntAt(pos), rdn->MsFromStart(0, Int64At(pos+4)));
1150 if (syscalls.count(syscall) == 1) {
1151 std::string sname = syscalls[syscall];
1152 printf("'syscallname': %s,\n",
1153 JSONString(sname).c_str());
1154 // Mark system calls that we should consider "waiting" system
1155 // calls, where we are not actually active.
1156 if (sname.find("WaitFor") != std::string::npos ||
1157 sname.find("RemoveIoCompletion") != std::string::npos) {
1158 printf("'waiting': 1,\n");
1163 case EVENT_TYPE_ENTER_CS:
1165 printf("'eventtype': 'EVENT_TYPE_ENTER_CS',\n"
1166 "'critical_section': 0x%x,\n", IntAt(pos));
1170 case EVENT_TYPE_TRYENTER_CS:
1172 printf("'eventtype': 'EVENT_TYPE_TRYENTER_CS',\n"
1173 "'critical_section': 0x%x,\n"
1174 "'retval': 0x%x,\n",
1175 IntAt(pos), IntAt(pos+4));
1179 case EVENT_TYPE_LEAVE_CS:
1181 printf("'eventtype': 'EVENT_TYPE_LEAVE_CS',\n"
1182 "'critical_section': 0x%x,\n", IntAt(pos));
1186 case EVENT_TYPE_APC:
1188 int func_addr = IntAt(pos);
1189 printf("'eventtype': 'EVENT_TYPE_APC',\n"
1190 "'func_addr': 0x%x,\n"
1191 "'func_addr_name': %s,\n"
1192 "'ret_addr': 0x%x,\n"
1195 res ? JSONString(res->Unresolve(func_addr)).c_str() : "\"\"",
1196 IntAt(pos+4), rdn->MsFromStart(0, Int64At(pos+8)));
1201 NOTREACHED("Unknown event type: %d", rt);
1209 int IntAt(int pos) { return *reinterpret_cast<int*>(&buf_[pos]); }
1210 __int64 Int64At(int pos) { return *reinterpret_cast<__int64*>(&buf_[pos]); }
1214 // Handle the process we install into or read back from.
1216 // The address where we will keep our playground in the remote process.
1218 // Lookup addresses from symbol names for ntdll.dll.
1219 SymResolver resolver_;
1221 // A local copy of the playground data, we copy it into the remote process.
1222 char buf_[kPlaygroundSize];
1226 int main(int argc, char** argv) {
1227 std::string command_line;
1228 bool use_symbols = false;
1229 bool attaching = false;
1230 bool launched = false;
1231 bool manual_quit = false;
1233 Playground::Options options;
1235 PROCESS_INFORMATION info = {0};
1240 if (std::string("--symbols") == argv[0]) {
1242 } else if (std::string("--vista") == argv[0]) {
1243 options.set_vista(true);
1244 } else if (std::string("--log-heap") == argv[0]) {
1245 options.set_log_heap(true);
1246 } else if (std::string("--log-lock") == argv[0]) {
1247 options.set_log_lock(true);
1248 } else if (std::string("--manual-quit") == argv[0]) {
1250 } else if (argc >= 2 && std::string("--unwind") == argv[0]) {
1251 options.set_stack_unwind_depth(atoi(argv[1]));
1253 } else if (argc >= 2 && !launched && std::string("--attach") == argv[0]) {
1255 info.hProcess = OpenProcess(PROCESS_ALL_ACCESS, FALSE, atoi(argv[1]));
1258 } else if (!launched) {
1259 STARTUPINFOA start_info = {0};
1260 start_info.cb = sizeof(start_info);
1262 if (!CreateProcessA(NULL,
1272 NOTREACHED("Failed to launch \"%s\": %d\n", argv[0], GetLastError());
1277 NOTREACHED("error parsing command line.");
1283 printf("usage: traceline.exe \"app.exe my arguments\"\n"
1284 " --attach 123: buggy support for attaching to a process\n"
1285 " --unwind 16: unwind the stack to the specified max depth\n"
1286 " --symbols: use symbols for stacktraces\n"
1287 " --log-heap: log heap operations (alloc / free).\n"
1288 " --log-lock: log lock (critical section) operations.\n");
1293 HANDLE exiting = CreateEvent(NULL, FALSE, FALSE, NULL);
1294 HANDLE exited = CreateEvent(NULL, FALSE, FALSE, NULL);
1296 // The playground object is big (32MB), dynamically alloc.
1297 Playground* pg = new Playground(info.hProcess, options);
1299 pg->AllocateInRemote();
1301 pg->PatchExit(exiting, exited);
1304 RDTSCNormalizer rdn;
1308 ResumeThread(info.hThread);
1310 // Wait until we have been notified that it's exiting.
1312 fprintf(stderr, "Press enter when you want stop tracing and collect.\n");
1316 HANDLE whs[] = {exiting, info.hProcess};
1317 if (WaitForMultipleObjects(2, whs, FALSE, INFINITE) != WAIT_OBJECT_0) {
1318 NOTREACHED("Failed to correctly capture process shutdown.");
1322 pg->CopyFromRemote();
1325 // Break in and get the symbols...
1326 SymResolver res(NULL, info.hProcess);
1327 pg->DumpJSON(&rdn, &res);
1329 pg->DumpJSON(&rdn, NULL);
1332 // Notify that it can exit now, we are done.
1335 CloseHandle(info.hProcess);
1336 CloseHandle(info.hThread);