From: Craig Topper Date: Mon, 29 Mar 2021 16:54:26 +0000 (-0700) Subject: [X86] Always use rip-relative addressing on 64-bit when rematerializing all zeros... X-Git-Tag: llvmorg-14-init~11012 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=54bacaf31127ee9d19e8df7ad7de5c94a4fc7c62;p=platform%2Fupstream%2Fllvm.git [X86] Always use rip-relative addressing on 64-bit when rematerializing all zeros/ones registers using a folded load. Previously we only used RIP relative when PIC was enabled. But we know we're in small/kernel code model here so we should be able to always use RIP-relative which will give a smaller encoding. Here's a godbolt link that demonstrates the current codegen https://godbolt.org/z/j3158o Note in the non-PIC version the load from .LCPI0_0 doesn't use RIP-relative addressing, but if you change the constant in the source from 0.0 to 1.0 it will become RIP-relative. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D97208 --- diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 1354befb..1334d10 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6085,15 +6085,16 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; - if (MF.getTarget().isPositionIndependent()) { - if (Subtarget.is64Bit()) - PICBase = X86::RIP; - else - // FIXME: PICBase = getGlobalBaseReg(&MF); - // This doesn't work for several reasons. - // 1. GlobalBaseReg may have been spilled. - // 2. It may not be live at MI. - return nullptr; + // Since we're using Small or Kernel code model, we can always use + // RIP-relative addressing for a smaller encoding. + if (Subtarget.is64Bit()) { + PICBase = X86::RIP; + } else if (MF.getTarget().isPositionIndependent()) { + // FIXME: PICBase = getGlobalBaseReg(&MF); + // This doesn't work for several reasons. + // 1. GlobalBaseReg may have been spilled. + // 2. It may not be live at MI. + return nullptr; } // Create a constant-pool entry. diff --git a/llvm/test/CodeGen/X86/avx-cmp.ll b/llvm/test/CodeGen/X86/avx-cmp.ll index e22ebc8..621ea2c 100644 --- a/llvm/test/CodeGen/X86/avx-cmp.ll +++ b/llvm/test/CodeGen/X86/avx-cmp.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_rip ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s define <8 x i32> @cmp00(<8 x float> %a, <8 x float> %b) nounwind { @@ -49,7 +49,7 @@ define void @render(double %a0) nounwind { ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vmovsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vucomisd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0 +; CHECK-NEXT: vucomisd {{\.LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: jne .LBB2_5 ; CHECK-NEXT: jnp .LBB2_2 ; CHECK-NEXT: .LBB2_5: # %if.then diff --git a/llvm/test/CodeGen/X86/mmx-fold-zero.ll b/llvm/test/CodeGen/X86/mmx-fold-zero.ll index 7f7d716..ad918b3 100644 --- a/llvm/test/CodeGen/X86/mmx-fold-zero.ll +++ b/llvm/test/CodeGen/X86/mmx-fold-zero.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_rip ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64 @@ -70,7 +70,7 @@ define double @mmx_zero(double, double, double, double) nounwind { ; X64-NEXT: paddw %mm2, %mm0 ; X64-NEXT: paddw %mm6, %mm0 ; X64-NEXT: pmuludq %mm3, %mm0 -; X64-NEXT: paddw {{\.LCPI[0-9]+_[0-9]+}}, %mm0 +; X64-NEXT: paddw {{\.LCPI[0-9]+_[0-9]+}}(%rip), %mm0 ; X64-NEXT: paddw %mm1, %mm0 ; X64-NEXT: pmuludq %mm7, %mm0 ; X64-NEXT: pmuludq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload