From 9701053517100045ca9fb0fd81233314ab08f600 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 1 Aug 2022 10:51:30 +0800 Subject: [PATCH] Introduce @llvm.threadlocal.address intrinsic to access TLS variable This belongs to a series of patches which try to solve the thread identification problem in coroutines. See https://discourse.llvm.org/t/address-thread-identification-problems-with-coroutine/62015 for a full background. The problem consists of two concrete problems: TLS variable and readnone functions. This patch tries to convert the TLS problem to readnone problem by converting the access of TLS variable to an intrinsic which is marked as readnone. The readnone problem would be addressed in following patches. Reviewed By: nikic, jyknight, nhaehnle, ychen Differential Revision: https://reviews.llvm.org/D125291 --- llvm/docs/LangRef.rst | 24 +++++++++++++ .../llvm/Analysis/TargetTransformInfoImpl.h | 1 + llvm/include/llvm/IR/IRBuilder.h | 3 ++ llvm/include/llvm/IR/Intrinsics.td | 4 +++ .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 4 +++ llvm/lib/IR/IRBuilder.cpp | 7 ++++ llvm/test/CodeGen/X86/threadlocal_address.ll | 41 ++++++++++++++++++++++ 7 files changed, 84 insertions(+) create mode 100644 llvm/test/CodeGen/X86/threadlocal_address.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 28d4121..117380a 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -24545,6 +24545,30 @@ information on the *based on* terminology see mask argument does not match the pointer size of the target, the mask is zero-extended or truncated accordingly. +.. _int_threadlocal_address: + +'``llvm.threadlocal.address``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare ptr @llvm.threadlocal.address(ptr) nounwind readnone willreturn + +Arguments: +"""""""""" + +The first argument is a pointer, which refers to a thread local global. + +Semantics: +"""""""""" + +The address of a thread local global is not a constant, since it depends on +the calling thread. The `llvm.threadlocal.address` intrinsic returns the +address of the given thread local global in the calling thread. + .. _int_vscale: '``llvm.vscale``' Intrinsic diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index eb1e688..289721d 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -650,6 +650,7 @@ public: case Intrinsic::coro_align: case Intrinsic::coro_suspend: case Intrinsic::coro_subfn_addr: + case Intrinsic::threadlocal_address: // These intrinsics don't actually represent code after lowering. return 0; } diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 6e559bb..879084e 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -753,6 +753,9 @@ public: /// If the pointer isn't i8* it will be converted. CallInst *CreateInvariantStart(Value *Ptr, ConstantInt *Size = nullptr); + /// Create a call to llvm.threadlocal.address intrinsic. + CallInst *CreateThreadLocalAddress(Value *Ptr); + /// Create a call to Masked Load intrinsic CallInst *CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru = nullptr, const Twine &Name = ""); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index d46fa4f..1a43ac3 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1404,6 +1404,10 @@ def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +// Intrinsic to wrap a thread local variable. +def int_threadlocal_address : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + def int_experimental_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [], [IntrNoMem]>; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 35650b9..2239148 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7178,6 +7178,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.getZExtOrTrunc(Const, sdl, PtrVT))); return; } + case Intrinsic::threadlocal_address: { + setValue(&I, getValue(I.getOperand(0))); + return; + } case Intrinsic::get_active_lane_mask: { EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Index = getValue(I.getOperand(0)); diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 137d6ea..0b11fb6 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -526,6 +526,13 @@ CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) { return CreateCall(TheFn, Ops); } +CallInst *IRBuilderBase::CreateThreadLocalAddress(Value *Ptr) { + assert(isa(Ptr) && cast(Ptr)->isThreadLocal() && + "threadlocal_address only applies to thread local variables."); + return CreateIntrinsic(llvm::Intrinsic::threadlocal_address, {Ptr->getType()}, + {Ptr}); +} + CallInst * IRBuilderBase::CreateAssumption(Value *Cond, ArrayRef OpBundles) { diff --git a/llvm/test/CodeGen/X86/threadlocal_address.ll b/llvm/test/CodeGen/X86/threadlocal_address.ll new file mode 100644 index 0000000..7a641c7 --- /dev/null +++ b/llvm/test/CodeGen/X86/threadlocal_address.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -stop-after=finalize-isel %s -o - | FileCheck %s + +@i = thread_local global i32 0, align 4 + +define noundef i32 @foo() { +; CHECK: %0:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gottpoff) @i, $noreg :: (load (s64) from got) +; CHECK: %1:gr32 = MOV32rm %0, 1, $noreg, 0, $fs :: (load (s32) from %ir.0) +; CHECK: %2:gr32 = nsw INC32r %1, implicit-def dead $eflags +; CHECK: MOV32mr %0, 1, $noreg, 0, $fs, %2 :: (store (s32) into %ir.0) +; CHECK: $eax = COPY %2 +; CHECK: RET 0, $eax +entry: + %0 = call ptr @llvm.threadlocal.address(ptr @i) + %1 = load i32, ptr %0, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, ptr %0, align 4 + %2 = call ptr @llvm.threadlocal.address(ptr @i) + %3 = load i32, ptr %2, align 4 + ret i32 %3 +} + +@j = thread_local addrspace(1) global i32 addrspace(0)* @i, align 4 +define noundef i32 @bar() { +; CHECK: %0:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gottpoff) @j, $noreg :: (load (s64) from got) +; CHECK: %1:gr32 = MOV32rm %0, 1, $noreg, 0, $fs :: (load (s32) from %ir.0, addrspace 1) +; CHECK: %2:gr32 = nsw INC32r %1, implicit-def dead $eflags +; CHECK: MOV32mr %0, 1, $noreg, 0, $fs, %2 :: (store (s32) into %ir.0, addrspace 1) +; CHECK: $eax = COPY %2 +; CHECK: RET 0, $eax +entry: + %0 = call ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @j) + %1 = load i32, ptr addrspace(1) %0, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, ptr addrspace(1) %0, align 4 + %2 = call ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @j) + %3 = load i32, ptr addrspace(1) %2, align 4 + ret i32 %3 +} + +declare ptr @llvm.threadlocal.address(ptr) nounwind readnone willreturn +declare ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1)) nounwind readnone willreturn -- 2.7.4