From 3fe4bd464cc647da36a5c4f4c0015fb653f0e3b1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 24 Feb 2019 19:33:37 +0000 Subject: [PATCH] [X86] Fix tls variable lowering issue with large code model Summary: The problem here is the lowering for tls variable. Below is the DAG for the code. SelectionDAG has 11 nodes: t0: ch = EntryToken t8: i64,ch = load<(load 8 from `i8 addrspace(257)* null`, addrspace 257)> t0, Constant:i64<0>, undef:i64 t10: i64 = X86ISD::WrapperRIP TargetGlobalTLSAddress:i64 0 [TF=10] t11: i64,ch = load<(load 8 from got)> t0, t10, undef:i64 t12: i64 = add t8, t11 t4: i32,ch = load<(dereferenceable load 4 from @x)> t0, t12, undef:i64 t6: ch = CopyToReg t0, Register:i32 %0, t4 And when mcmodel is large, below instruction can NOT be folded. t10: i64 = X86ISD::WrapperRIP TargetGlobalTLSAddress:i64 0 [TF=10] t11: i64,ch = load<(load 8 from got)> t0, t10, undef:i64 So "t11: i64,ch = load<(load 8 from got)> t0, t10, undef:i64" is lowered to " Morphed node: t11: i64,ch = MOV64rm t10, TargetConstant:i8<1>, Register:i64 $noreg, TargetConstant:i32<0>, Register:i32 $noreg, t0" When llvm start to lower "t10: i64 = X86ISD::WrapperRIP TargetGlobalTLSAddress:i64 0 [TF=10]", it fails. The patch is to fold the load and X86ISD::WrapperRIP. Fixes PR26906 Patch by LuoYuanke Reviewers: craig.topper, rnk, annita.zhang, wxiao3 Reviewed By: rnk Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D58336 llvm-svn: 354756 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 18 +++++-- llvm/test/CodeGen/X86/code-model-elf.ll | 66 +++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 25fb5d746984..5f051b39cd53 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1137,15 +1137,23 @@ bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { if (AM.hasSymbolicDisplacement()) return true; + bool IsRIPRelTLS = false; bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP; + if (IsRIPRel) { + SDValue Val = N.getOperand(0); + if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) + IsRIPRelTLS = true; + } - // We can't use an addressing mode in the 64-bit large code model. In the - // medium code model, we use can use an mode when RIP wrappers are present. - // That signifies access to globals that are known to be "near", such as the - // GOT itself. + // We can't use an addressing mode in the 64-bit large code model. + // Global TLS addressing is an exception. In the medium code model, + // we use can use a mode when RIP wrappers are present. + // That signifies access to globals that are known to be "near", + // such as the GOT itself. CodeModel::Model M = TM.getCodeModel(); if (Subtarget->is64Bit() && - (M == CodeModel::Large || (M == CodeModel::Medium && !IsRIPRel))) + ((M == CodeModel::Large && !IsRIPRelTLS) || + (M == CodeModel::Medium && !IsRIPRel))) return true; // Base and index reg must be 0 in order to use %rip as base. diff --git a/llvm/test/CodeGen/X86/code-model-elf.ll b/llvm/test/CodeGen/X86/code-model-elf.ll index 56d3f4c102f0..f7ffd6ea1eb7 100644 --- a/llvm/test/CodeGen/X86/code-model-elf.ll +++ b/llvm/test/CodeGen/X86/code-model-elf.ll @@ -37,6 +37,8 @@ target triple = "x86_64--linux" @global_data = dso_local global [10 x i32] [i32 1, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0], align 16 @static_data = internal global [10 x i32] zeroinitializer, align 16 @extern_data = external global [10 x i32], align 16 +@thread_data = external thread_local global i32, align 4 + define dso_local i32* @lea_static_data() #0 { ; SMALL-STATIC-LABEL: lea_static_data: @@ -373,6 +375,70 @@ define dso_local void ()* @lea_extern_fn() #0 { ret void ()* @extern_fn } +; FIXME: The result is same for small, medium and large model, because we +; specify pie option in the test case. And the type of tls is initial exec tls. +; For pic code. The large model code for pic tls should be emitted as below. + +; .L3: +; leaq .L3(%rip), %rbx +; movabsq $_GLOBAL_OFFSET_TABLE_-.L3, %r11 +; addq %r11, %rbx +; leaq thread_data@TLSGD(%rip), %rdi +; movabsq $__tls_get_addr@PLTOFF, %rax +; addq %rbx, %rax +; call *%rax +; movl (%rax), %eax + +; The medium and small model code for pic tls should be emitted as below. +; data16 +; leaq thread_data@TLSGD(%rip), %rdi +; data16 +; data16 +; rex64 +; callq __tls_get_addr@PLT +; movl (%rax), %eax + +define dso_local i32 @load_thread_data() #0 { +; SMALL-STATIC-LABEL: load_thread_data: +; SMALL-STATIC: # %bb.0: +; SMALL-STATIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax +; SMALL-STATIC-NEXT: movl %fs:(%rax), %eax +; SMALL-STATIC-NEXT: retq +; +; MEDIUM-STATIC-LABEL: load_thread_data: +; MEDIUM-STATIC: # %bb.0: +; MEDIUM-STATIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax +; MEDIUM-STATIC-NEXT: movl %fs:(%rax), %eax +; MEDIUM-STATIC-NEXT: retq +; +; LARGE-STATIC-LABEL: load_thread_data: +; LARGE-STATIC: # %bb.0: +; LARGE-STATIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax +; LARGE-STATIC-NEXT: movl %fs:(%rax), %eax +; LARGE-STATIC-NEXT: retq +; +; SMALL-PIC-LABEL: load_thread_data: +; SMALL-PIC: # %bb.0: +; SMALL-PIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax +; SMALL-PIC-NEXT: movl %fs:(%rax), %eax +; SMALL-PIC-NEXT: retq +; +; MEDIUM-PIC-LABEL: load_thread_data: +; MEDIUM-PIC: # %bb.0: +; MEDIUM-PIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax +; MEDIUM-PIC-NEXT: movl %fs:(%rax), %eax +; MEDIUM-PIC-NEXT: retq +; +; LARGE-PIC-LABEL: load_thread_data: +; LARGE-PIC: # %bb.0: +; LARGE-PIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax +; LARGE-PIC-NEXT: movl %fs:(%rax), %eax +; LARGE-PIC-NEXT: retq +; + %1 = load i32, i32* @thread_data, align 4 + ret i32 %1 +} + attributes #0 = { noinline nounwind uwtable } !llvm.module.flags = !{!0, !1, !2} -- 2.34.1