From: Simon Pilgrim Date: Wed, 3 Oct 2018 19:02:38 +0000 (+0000) Subject: [X86] PUSH/POP 'mem-mem' instructions are not RMW - these are 2 different addresses X-Git-Tag: llvmorg-8.0.0-rc1~7306 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=aabd99c27aa5b0d4f2da884678699eebc757c700;p=platform%2Fupstream%2Fllvm.git [X86] PUSH/POP 'mem-mem' instructions are not RMW - these are 2 different addresses This patch adds a 'WriteCopy' [WriteLoad, WriteStore] schedule sequence instead to better model the behaviour Found by @andreadb during llvm-mca testing on btver2 which was crashing on "zero uop" WriteRMW only instructions llvm-svn: 343708 --- diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 160401c..053c07a 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -1210,12 +1210,12 @@ def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>, OpSize32, Requires<[Not64BitMode]>, NotMemoryFoldable; } // isCodeGenOnly = 1, ForceDisassemble = 1 } // mayLoad, SchedRW -let mayStore = 1, mayLoad = 1, SchedRW = [WriteRMW] in { +let mayStore = 1, mayLoad = 1, SchedRW = [WriteCopy] in { def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", []>, OpSize16; def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", []>, OpSize32, Requires<[Not64BitMode]>; -} // mayStore, mayLoad, WriteRMW +} // mayStore, mayLoad, SchedRW let mayStore = 1, SchedRW = [WriteStore] in { def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>, @@ -1243,7 +1243,7 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), Requires<[Not64BitMode]>; } // mayStore, SchedRW -let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in { def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src", []>, OpSize16; def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src", []>, @@ -1302,7 +1302,7 @@ def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>, OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable; } // isCodeGenOnly = 1, ForceDisassemble = 1 } // mayLoad, SchedRW -let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in +let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", []>, OpSize32, Requires<[In64BitMode]>; let mayStore = 1, SchedRW = [WriteStore] in { @@ -1314,7 +1314,7 @@ def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>, OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable; } // isCodeGenOnly = 1, ForceDisassemble = 1 } // mayStore, SchedRW -let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in { def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>, OpSize32, Requires<[In64BitMode]>; } // mayLoad, mayStore, SchedRW diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 4088422..39df5e7 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -107,6 +107,7 @@ def WriteLoad : SchedWrite; def WriteStore : SchedWrite; def WriteStoreNT : SchedWrite; def WriteMove : SchedWrite; +def WriteCopy : WriteSequence<[WriteLoad, WriteStore]>; // mem->mem copy // Arithmetic. defm WriteALU : X86SchedWritePair; // Simple integer ALU op. diff --git a/llvm/test/CodeGen/X86/schedule-x86_32.ll b/llvm/test/CodeGen/X86/schedule-x86_32.ll index 6df7836..873d6a6 100644 --- a/llvm/test/CodeGen/X86/schedule-x86_32.ll +++ b/llvm/test/CodeGen/X86/schedule-x86_32.ll @@ -1676,9 +1676,9 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP ; SLM-NEXT: popw %ax # sched: [3:1.00] -; SLM-NEXT: popw (%ecx) # sched: [1:1.00] +; SLM-NEXT: popw (%ecx) # sched: [4:2.00] ; SLM-NEXT: pushw %ax # sched: [1:1.00] -; SLM-NEXT: pushw (%ecx) # sched: [1:1.00] +; SLM-NEXT: pushw (%ecx) # sched: [4:2.00] ; SLM-NEXT: pushw $4095 # imm = 0xFFF ; SLM-NEXT: # sched: [1:1.00] ; SLM-NEXT: pushw $7 # sched: [1:1.00] @@ -1766,9 +1766,9 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP ; BTVER2-NEXT: popw %ax # sched: [5:1.00] -; BTVER2-NEXT: popw (%ecx) # sched: [1:1.00] +; BTVER2-NEXT: popw (%ecx) # sched: [6:1.00] ; BTVER2-NEXT: pushw %ax # sched: [1:1.00] -; BTVER2-NEXT: pushw (%ecx) # sched: [1:1.00] +; BTVER2-NEXT: pushw (%ecx) # sched: [6:1.00] ; BTVER2-NEXT: pushw $4095 # imm = 0xFFF ; BTVER2-NEXT: # sched: [1:1.00] ; BTVER2-NEXT: pushw $7 # sched: [1:1.00] @@ -1828,9 +1828,9 @@ define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize { ; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] ; SLM-NEXT: #APP ; SLM-NEXT: popl %eax # sched: [3:1.00] -; SLM-NEXT: popl (%ecx) # sched: [1:1.00] +; SLM-NEXT: popl (%ecx) # sched: [4:2.00] ; SLM-NEXT: pushl %eax # sched: [1:1.00] -; SLM-NEXT: pushl (%ecx) # sched: [1:1.00] +; SLM-NEXT: pushl (%ecx) # sched: [4:2.00] ; SLM-NEXT: pushl $4095 # imm = 0xFFF ; SLM-NEXT: # sched: [1:1.00] ; SLM-NEXT: pushl $7 # sched: [1:1.00] @@ -1918,9 +1918,9 @@ define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize { ; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00] ; BTVER2-NEXT: #APP ; BTVER2-NEXT: popl %eax # sched: [5:1.00] -; BTVER2-NEXT: popl (%ecx) # sched: [1:1.00] +; BTVER2-NEXT: popl (%ecx) # sched: [6:1.00] ; BTVER2-NEXT: pushl %eax # sched: [1:1.00] -; BTVER2-NEXT: pushl (%ecx) # sched: [1:1.00] +; BTVER2-NEXT: pushl (%ecx) # sched: [6:1.00] ; BTVER2-NEXT: pushl $4095 # imm = 0xFFF ; BTVER2-NEXT: # sched: [1:1.00] ; BTVER2-NEXT: pushl $7 # sched: [1:1.00] @@ -1933,7 +1933,7 @@ define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize { ; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] ; ZNVER1-NEXT: #APP ; ZNVER1-NEXT: popl %eax # sched: [8:0.50] -; ZNVER1-NEXT: popl (%ecx) # sched: [1:0.50] +; ZNVER1-NEXT: popl (%ecx) # sched: [9:1.00] ; ZNVER1-NEXT: pushl %eax # sched: [1:0.50] ; ZNVER1-NEXT: pushl (%ecx) # sched: [4:0.50] ; ZNVER1-NEXT: pushl $4095 # imm = 0xFFF diff --git a/llvm/test/CodeGen/X86/schedule-x86_64.ll b/llvm/test/CodeGen/X86/schedule-x86_64.ll index 78b8950..e903ff5 100644 --- a/llvm/test/CodeGen/X86/schedule-x86_64.ll +++ b/llvm/test/CodeGen/X86/schedule-x86_64.ll @@ -9648,9 +9648,9 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize { ; SLM: # %bb.0: ; SLM-NEXT: #APP ; SLM-NEXT: popw %ax # sched: [3:1.00] -; SLM-NEXT: popw (%rsi) # sched: [1:1.00] +; SLM-NEXT: popw (%rsi) # sched: [4:2.00] ; SLM-NEXT: pushw %di # sched: [1:1.00] -; SLM-NEXT: pushw (%rsi) # sched: [1:1.00] +; SLM-NEXT: pushw (%rsi) # sched: [4:2.00] ; SLM-NEXT: pushw $4095 # imm = 0xFFF ; SLM-NEXT: # sched: [1:1.00] ; SLM-NEXT: pushw $7 # sched: [1:1.00] @@ -9726,9 +9726,9 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP ; BTVER2-NEXT: popw %ax # sched: [5:1.00] -; BTVER2-NEXT: popw (%rsi) # sched: [1:1.00] +; BTVER2-NEXT: popw (%rsi) # sched: [6:1.00] ; BTVER2-NEXT: pushw %di # sched: [1:1.00] -; BTVER2-NEXT: pushw (%rsi) # sched: [1:1.00] +; BTVER2-NEXT: pushw (%rsi) # sched: [6:1.00] ; BTVER2-NEXT: pushw $4095 # imm = 0xFFF ; BTVER2-NEXT: # sched: [1:1.00] ; BTVER2-NEXT: pushw $7 # sched: [1:1.00] @@ -9781,9 +9781,9 @@ define i64 @test_pop_push_64(i64 %a0, i64 *%a1) optsize { ; SLM: # %bb.0: ; SLM-NEXT: #APP ; SLM-NEXT: popq %rax # sched: [3:1.00] -; SLM-NEXT: popq (%rsi) # sched: [1:1.00] +; SLM-NEXT: popq (%rsi) # sched: [4:2.00] ; SLM-NEXT: pushq %rdi # sched: [1:1.00] -; SLM-NEXT: pushq (%rsi) # sched: [1:1.00] +; SLM-NEXT: pushq (%rsi) # sched: [4:2.00] ; SLM-NEXT: pushq $4095 # imm = 0xFFF ; SLM-NEXT: # sched: [1:1.00] ; SLM-NEXT: pushq $7 # sched: [1:1.00] @@ -9859,9 +9859,9 @@ define i64 @test_pop_push_64(i64 %a0, i64 *%a1) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP ; BTVER2-NEXT: popq %rax # sched: [5:1.00] -; BTVER2-NEXT: popq (%rsi) # sched: [1:1.00] +; BTVER2-NEXT: popq (%rsi) # sched: [6:1.00] ; BTVER2-NEXT: pushq %rdi # sched: [1:1.00] -; BTVER2-NEXT: pushq (%rsi) # sched: [1:1.00] +; BTVER2-NEXT: pushq (%rsi) # sched: [6:1.00] ; BTVER2-NEXT: pushq $4095 # imm = 0xFFF ; BTVER2-NEXT: # sched: [1:1.00] ; BTVER2-NEXT: pushq $7 # sched: [1:1.00] @@ -9872,9 +9872,9 @@ define i64 @test_pop_push_64(i64 %a0, i64 *%a1) optsize { ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: #APP ; ZNVER1-NEXT: popq %rax # sched: [8:0.50] -; ZNVER1-NEXT: popq (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: popq (%rsi) # sched: [9:1.00] ; ZNVER1-NEXT: pushq %rdi # sched: [1:0.50] -; ZNVER1-NEXT: pushq (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: pushq (%rsi) # sched: [9:1.00] ; ZNVER1-NEXT: pushq $4095 # imm = 0xFFF ; ZNVER1-NEXT: # sched: [1:0.50] ; ZNVER1-NEXT: pushq $7 # sched: [1:0.50]