/// target cpu. 15-bytes is the longest single NOP instruction, but some
/// platforms can't decode the longest forms efficiently.
static unsigned maxLongNopLength(const X86Subtarget *Subtarget) {
- uint64_t MaxNopLength = 10;
if (Subtarget->getFeatureBits()[X86::ProcIntelSLM])
- MaxNopLength = 7;
- else if (Subtarget->getFeatureBits()[X86::FeatureFast15ByteNOP])
- MaxNopLength = 15;
- else if (Subtarget->getFeatureBits()[X86::FeatureFast11ByteNOP])
- MaxNopLength = 11;
- return MaxNopLength;
+ return 7;
+ if (Subtarget->getFeatureBits()[X86::FeatureFast15ByteNOP])
+ return 15;
+ if (Subtarget->getFeatureBits()[X86::FeatureFast11ByteNOP])
+ return 11;
+ if (Subtarget->getFeatureBits()[X86::FeatureNOPL] || Subtarget->is64Bit())
+ return 10;
+ if (Subtarget->is32Bit())
+ return 2;
+ return 1;
}
/// Emit the largest nop instruction smaller than or equal to \p NumBytes
/// bytes. Return the size of nop emitted.
static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
const X86Subtarget *Subtarget) {
- if (!Subtarget->is64Bit()) {
- // TODO Do additional checking if the CPU supports multi-byte nops.
- OS.emitInstruction(MCInstBuilder(X86::NOOP), *Subtarget);
- return 1;
- }
-
// Cap a single nop emission at the profitable value for the target
NumBytes = std::min(NumBytes, maxLongNopLength(Subtarget));
CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
if (Code.size() < MinSize) {
- if (MinSize == 2 && Opcode == X86::PUSH64r) {
+ if (MinSize == 2 && Subtarget->is32Bit() &&
+ Subtarget->isTargetWindowsMSVC() &&
+ (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
+ // For compatibilty reasons, when targetting MSVC, is is important to
+ // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
+ // rely specifically on this pattern to be able to patch a function.
+ // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
+ *Subtarget);
+ } else if (MinSize == 2 && Opcode == X86::PUSH64r) {
// This is an optimization that lets us get away without emitting a nop in
// many cases.
//
define void @f2() "patchable-function-entry"="2" {
; CHECK-LABEL: f2:
; CHECK-NEXT: .Lfunc_begin2:
-; 32-COUNT-2: nop
+; 32: xchgw %ax, %ax
; 64: xchgw %ax, %ax
; CHECK-NEXT: ret
; CHECK: .section __patchable_function_entries,"awo",@progbits,f2{{$}}
define void @f3() "patchable-function-entry"="3" comdat {
; CHECK-LABEL: f3:
; CHECK-NEXT: .Lfunc_begin3:
-; 32-COUNT-3: nop
+; 32: xchgw %ax, %ax
+; 32-NEXT: nop
; 64: nopl (%rax)
; CHECK: ret
; CHECK: .section __patchable_function_entries,"aGwo",@progbits,f3,comdat,f3{{$}}
define void @f5() "patchable-function-entry"="5" comdat {
; CHECK-LABEL: f5:
; CHECK-NEXT: .Lfunc_begin4:
-; 32-COUNT-5: nop
+; 32-COUNT-2: xchgw %ax, %ax
+; 32-NEXT: nop
; 64: nopl 8(%rax,%rax)
; CHECK-NEXT: ret
; CHECK: .section __patchable_function_entries,"aGwo",@progbits,f5,comdat,f5{{$}}
; RUN: llc -verify-machineinstrs -filetype=obj -o - -mtriple=x86_64-apple-macosx < %s | llvm-objdump --triple=x86_64-apple-macosx -d - | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx < %s | FileCheck %s --check-prefix=CHECK-ALIGN
+; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386 < %s | FileCheck %s --check-prefixes=32,32CFI,XCHG
+; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc < %s | FileCheck %s --check-prefixes=32,MOV
+; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc -mcpu=pentium3 < %s | FileCheck %s --check-prefixes=32,MOV
+; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc -mcpu=pentium4 < %s | FileCheck %s --check-prefixes=32,XCHG
+; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=64
+; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-unknown-linux-code16 < %s | FileCheck %s --check-prefix=16
+
+; 16-NOT: movl %edi, %edi
+; 16-NOT: xchgw %ax, %ax
declare void @callee(i64*)
; CHECK-ALIGN: .p2align 4, 0x90
; CHECK-ALIGN: _f0:
+; 32: f0:
+; 32CFI-NEXT: .cfi_startproc
+; 32-NEXT: # %bb.0:
+; XCHG-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
+; MOV-NEXT: movl %edi, %edi # encoding: [0x8b,0xff]
+; 32-NEXT: retl
+
+; 64: f0:
+; 64-NEXT: # %bb.0:
+; 64-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
+; 64-NEXT: retq
+
ret void
}
; CHECK-ALIGN: .p2align 4, 0x90
; CHECK-ALIGN: _f1:
+
+; 32: f1:
+; 32CFI-NEXT: .cfi_startproc
+; 32-NEXT: # %bb.0:
+; XCHG-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
+; MOV-NEXT: movl %edi, %edi # encoding: [0x8b,0xff]
+; 32-NEXT: pushl %ebp
+
+; 64: f1:
+; 64-NEXT: .seh_proc f1
+; 64-NEXT: # %bb.0:
+; 64-NEXT: pushq %rbp
+
ret void
}
; CHECK-ALIGN: .p2align 4, 0x90
; CHECK-ALIGN: _f2:
+
+; 32: f2:
+; 32CFI-NEXT: .cfi_startproc
+; 32-NEXT: # %bb.0:
+; XCHG-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
+; MOV-NEXT: movl %edi, %edi # encoding: [0x8b,0xff]
+; 32-NEXT: pushl %ebp
+
+; 64: f2:
+; 64-NEXT: .seh_proc f2
+; 64-NEXT: # %bb.0:
+; 64-NEXT: subq $200, %rsp
+
%ptr = alloca i64, i32 20
call void @callee(i64* %ptr)
ret void
; CHECK-ALIGN: .p2align 4, 0x90
; CHECK-ALIGN: _f3:
+
+; 32: f3:
+; 32CFI-NEXT: .cfi_startproc
+; 32-NEXT: # %bb.0:
+; XCHG-NEXT: xchgw %ax, %ax
+; MOV-NEXT: movl %edi, %edi
+; 32-NEXT: retl
+
+; 64: f3:
+; 64-NEXT: # %bb.0:
+; 64-NEXT: xchgw %ax, %ax
+; 64-NEXT: retq
+
ret void
}
; patchable one.
; CHECK-LABEL: f4{{>?}}:
; CHECK-NEXT: 8b 0c 37 movl (%rdi,%rsi), %ecx
+; 32: f4:
+; 32CFI-NEXT: .cfi_startproc
+; 32-NEXT: # %bb.0:
+; XCHG-NEXT: xchgw %ax, %ax
+; MOV-NEXT: movl %edi, %edi
+; 32-NEXT: pushl %ebx
+
+; 64: f4:
+; 64-NEXT: # %bb.0:
+; 64-NOT: xchgw %ax, %ax
+
define i32 @f4(i8* %arg1, i64 %arg2, i32 %arg3) "patchable-function"="prologue-short-redirect" {
bb:
%tmp10 = getelementptr i8, i8* %arg1, i64 %arg2