[NativeAOT] ThreadStatics part 2 (#87148)
authorVladimir Sadov <vsadov@microsoft.com>
Fri, 23 Jun 2023 16:59:16 +0000 (09:59 -0700)
committerGitHub <noreply@github.com>
Fri, 23 Jun 2023 16:59:16 +0000 (09:59 -0700)
* introduced TlsRootNode

* remove RhGetInlinedThreadStaticStorage

* get rid of c++ tls_InlinedThreadStatics

* remove GetSingleTypeManager

* use .tdata on unix

* do not switch OSX just yet

* bring back tls_InlinedThreadStatics on Windows temporarily

* emit inline access on windows

* unify tls sections

* inline TLS access on linux-x64

* no need for RhpGetInlinedThreadStaticBase when inlining the access

* some comments and TODOs

* enable ILC generation of tls_InlinedThreadStatics on win-x64

* allow storage inlining in multimodule case when TLS access is inlined

* disable "Initial Exec" optimizations

* some comments and formatting

* follow up change that was suggested in the previous PR

* Remove use of RhpGetInlinedThreadstaticBase on x64

* Remove use of RhpGetInlinedThreadStaticBase on arm64

* removed tls_InlinedThreadStatics

* a few cleanups/typos

* fix after rebase

* inlined TLS support for linux-arm64

* PR feedback

27 files changed:
src/coreclr/nativeaot/Runtime.Base/src/System/Object.cs
src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp
src/coreclr/nativeaot/Runtime/RuntimeInstance.h
src/coreclr/nativeaot/Runtime/amd64/MiscStubs.S
src/coreclr/nativeaot/Runtime/amd64/MiscStubs.asm
src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S
src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm
src/coreclr/nativeaot/Runtime/threadstore.cpp
src/coreclr/nativeaot/Runtime/threadstore.inl
src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc
src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/ThreadStatics.cs
src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs
src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectDataBuilder.cs
src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectNodeSection.cs
src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs
src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs
src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_X64/X64Emitter.cs
src/coreclr/tools/Common/TypeSystem/Common/Utilities/GCPointerMap.Algorithm.cs
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/HelperEntrypoint.cs
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ObjectWriter.cs
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs [new file with mode: 0644]
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeThreadStaticIndexNode.cs
src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj
src/coreclr/tools/aot/ILCompiler/Program.cs

index b7e3d15..08d6279 100644 (file)
@@ -45,9 +45,6 @@ namespace System
             }
         }
 
-        [Runtime.CompilerServices.Intrinsic]
-        internal static extern MethodTable* MethodTableOf<T>();
-
         internal EETypePtr EETypePtr
         {
             get
index 2d1a697..e30e895 100644 (file)
@@ -267,25 +267,6 @@ RuntimeInstance::TypeManagerList& RuntimeInstance::GetTypeManagerList()
     return m_TypeManagerList;
 }
 
-TypeManager* RuntimeInstance::GetSingleTypeManager()
-{
-    auto head = m_TypeManagerList.GetHead();
-    if (head != NULL && head->m_pNext == NULL)
-    {
-        return head->m_pTypeManager;
-    }
-
-    return NULL;
-}
-
-COOP_PINVOKE_HELPER(TypeManagerHandle, RhGetSingleTypeManager, ())
-{
-    TypeManager* typeManager = GetRuntimeInstance()->GetSingleTypeManager();
-    ASSERT(typeManager != NULL);
-
-    return TypeManagerHandle::Create(typeManager);
-}
-
 // static
 bool RuntimeInstance::Initialize(HANDLE hPalInstance)
 {
index 06db508..2de7c22 100644 (file)
@@ -99,7 +99,6 @@ public:
 
     bool RegisterTypeManager(TypeManager * pTypeManager);
     TypeManagerList& GetTypeManagerList();
-    TypeManager* GetSingleTypeManager();
     OsModuleList* GetOsModuleList();
 
     bool RegisterUnboxingStubs(PTR_VOID pvStartRange, uint32_t cbRange);
index c2247d8..34acbfe 100644 (file)
@@ -44,22 +44,3 @@ LOCAL_LABEL(ProbeLoop):
         RESET_FRAME_WITH_RBP
         ret
 NESTED_END RhpStackProbe, _TEXT
-
-#ifndef TARGET_ANDROID
-NESTED_ENTRY RhpGetInlinedThreadStaticBase, _TEXT, NoHandler
-        // On exit:
-        //   rax - the thread static base for the given type
-
-        // rdi = &tls_InlinedThreadStatics
-        INLINE_GET_TLS_VAR tls_InlinedThreadStatics
-        mov     rdi, rax
-
-        // get per-thread storage
-        mov     rax, [rdi]
-        test    rax, rax
-        jz      C_FUNC(RhpGetInlinedThreadStaticBaseSlow)    // rdi contains the storage ref
-
-        // return it
-        ret
-NESTED_END RhpGetInlinedThreadStaticBase, _TEXT
-#endif
index c4f39f5..c3eb1fc 100644 (file)
@@ -3,8 +3,6 @@
 
 include AsmMacros.inc
 
-EXTERN RhpGetInlinedThreadStaticBaseSlow : PROC
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; The following helper will access ("probe") a word on each page of the stack
 ; starting with the page right beneath rsp down to the one pointed to by r11.
@@ -39,20 +37,4 @@ ProbeLoop:
 
 LEAF_END RhpStackProbe, _TEXT
 
-LEAF_ENTRY RhpGetInlinedThreadStaticBase, _TEXT
-        ; On exit:
-        ;   rax - the thread static base for the given type
-
-        ;; rcx = &tls_InlinedThreadStatics, TRASHES r8
-        INLINE_GET_TLS_VAR rcx, r8, tls_InlinedThreadStatics
-
-        ;; get per-thread storage
-        mov     rax, [rcx]
-        test    rax, rax
-        jz      RhpGetInlinedThreadStaticBaseSlow   ;; rcx contains the storage ref
-
-        ;; return it
-        ret
-LEAF_END RhpGetInlinedThreadStaticBase, _TEXT
-
 end
index 34af83c..ea5d91a 100644 (file)
@@ -3,24 +3,3 @@
 
 #include <unixasmmacros.inc>
 #include "AsmOffsets.inc"
-
-#ifndef TARGET_ANDROID
-NESTED_ENTRY RhpGetInlinedThreadStaticBase, _TEXT, NoHandler
-        // On exit:
-        //   x0 - the thread static base for the given type
-
-        // x1 = GetThread()
-        INLINE_GET_TLS_VAR x1, C_FUNC(tls_InlinedThreadStatics)
-
-        // get per-thread storage
-        ldr     x0, [x1]
-        cbnz    x0, HaveValue
-        mov     x0, x1
-        b       C_FUNC(RhpGetInlinedThreadStaticBaseSlow)
-
-HaveValue:
-        // return it
-        ret
-
-NESTED_END RhpGetInlinedThreadStaticBase, _TEXT
-#endif
index cdb076b..49baea4 100644 (file)
@@ -3,25 +3,6 @@
 
 #include "AsmMacros.h"
 
-    EXTERN RhpGetInlinedThreadStaticBaseSlow
-
     TEXTAREA
 
-;; On exit:
-;;   x0 - the thread static base for the given type
-    LEAF_ENTRY RhpGetInlinedThreadStaticBase
-        ;; x1 = &tls_InlinedThreadStatics, TRASHES x2
-        INLINE_GET_TLS_VAR x1, x2, tls_InlinedThreadStatics
-
-        ;; get per-thread storage
-        ldr     x0, [x1]
-        cbnz    x0, HaveValue
-        mov     x0, x1
-        b       RhpGetInlinedThreadStaticBaseSlow
-
-HaveValue
-        ;; return it
-        ret
-    LEAF_END RhpGetInlinedThreadStaticBase
-
     end
index c65d957..67a6949 100644 (file)
@@ -430,11 +430,6 @@ C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer));
 
 #ifndef _MSC_VER
 __thread ThreadBuffer tls_CurrentThread;
-
-// the root of inlined threadstatics storage
-// there is only one now,
-// eventually this will be emitted by ILC and we may have more than one such variable
-__thread InlinedThreadStaticRoot tls_InlinedThreadStatics;
 #endif
 
 EXTERN_C ThreadBuffer* RhpGetThread()
@@ -442,11 +437,6 @@ EXTERN_C ThreadBuffer* RhpGetThread()
     return &tls_CurrentThread;
 }
 
-COOP_PINVOKE_HELPER(Object**, RhGetInlinedThreadStaticStorage, ())
-{
-    return &tls_InlinedThreadStatics.m_threadStaticsBase;
-}
-
 #endif // !DACCESS_COMPILE
 
 #ifdef _WIN32
index 6fe750f..2949504 100644 (file)
@@ -4,14 +4,8 @@
 #ifdef _MSC_VER
 // a workaround to prevent tls_CurrentThread from becoming dynamically checked/initialized.
 EXTERN_C __declspec(selectany) __declspec(thread) ThreadBuffer tls_CurrentThread;
-
-// the root of inlined threadstatics storage
-// there is only one now,
-// eventually this will be emitted by ILC and we may have more than one such variable
-EXTERN_C __declspec(selectany) __declspec(thread) InlinedThreadStaticRoot tls_InlinedThreadStatics;
 #else
 EXTERN_C __thread ThreadBuffer tls_CurrentThread;
-EXTERN_C __thread InlinedThreadStaticRoot tls_InlinedThreadStatics;
 #endif
 
 // static
index 1aaf7c5..ab64ef9 100644 (file)
@@ -288,9 +288,12 @@ C_FUNC(\Name):
         movq    _\Var@TLVP(%rip), %rdi
         callq   *(%rdi)
 #else
-        leaq    \Var@TLSLD(%rip), %rdi
+        .byte 0x66  // data16 prefix - padding to have space for linker relaxations
+        leaq    \Var@TLSGD(%rip), %rdi
+        .byte 0x66  //
+        .byte 0x66  //
+        .byte 0x48  // rex.W prefix, also for padding
         callq   __tls_get_addr@PLT
-        addq    $\Var@DTPOFF, %rax
 #endif
        .intel_syntax noprefix
 .endm
index 9734e2a..7bfcbff 100644 (file)
@@ -17,28 +17,26 @@ namespace Internal.Runtime
     /// </summary>
     internal static class ThreadStatics
     {
+        [ThreadStatic]
+        private static object t_inlinedThreadStaticBase;
+
         /// <summary>
         /// This method is called from a ReadyToRun helper to get base address of thread
         /// static storage for the given type.
         /// </summary>
         internal static unsafe object GetThreadStaticBaseForType(TypeManagerSlot* pModuleData, int typeTlsIndex)
         {
-            if (typeTlsIndex >= 0)
-                return GetUninlinedThreadStaticBaseForType(pModuleData, typeTlsIndex);
-
-            ref object? threadStorage = ref RuntimeImports.RhGetInlinedThreadStaticStorage();
-            if (threadStorage != null)
-                return threadStorage;
+            if (typeTlsIndex < 0)
+                return t_inlinedThreadStaticBase;
 
-            return GetInlinedThreadStaticBaseSlow(ref threadStorage);
+            return GetUninlinedThreadStaticBaseForType(pModuleData, typeTlsIndex);
         }
 
-        [RuntimeExport("RhpGetInlinedThreadStaticBaseSlow")]
         internal static unsafe object GetInlinedThreadStaticBaseSlow(ref object? threadStorage)
         {
             Debug.Assert(threadStorage == null);
             // Allocate an object that will represent a memory block for all thread static fields
-            TypeManagerHandle typeManager = RuntimeImports.RhGetSingleTypeManager();
+            TypeManagerHandle typeManager = EETypePtr.EETypePtrOf<object>().ToPointer()->TypeManager;
             object threadStaticBase = AllocateThreadStaticStorageForType(typeManager, 0);
 
             // register the storage location with the thread for GC reporting.
@@ -46,6 +44,8 @@ namespace Internal.Runtime
 
             // assign the storage block to the storage variable and return
             threadStorage = threadStaticBase;
+            t_inlinedThreadStaticBase = threadStaticBase;
+
             return threadStaticBase;
         }
 
@@ -55,13 +55,13 @@ namespace Internal.Runtime
             int moduleIndex = pModuleData->ModuleIndex;
             Debug.Assert(moduleIndex >= 0);
 
-            object[][] threadStorage = RuntimeImports.RhGetThreadStaticStorage();
-            if (threadStorage != null && threadStorage.Length > moduleIndex)
+            object[][] perThreadStorage = RuntimeImports.RhGetThreadStaticStorage();
+            if (perThreadStorage != null && perThreadStorage.Length > moduleIndex)
             {
-                object[] moduleStorage = threadStorage[moduleIndex];
-                if (moduleStorage != null && moduleStorage.Length > typeTlsIndex)
+                object[] perModuleStorage = perThreadStorage[moduleIndex];
+                if (perModuleStorage != null && perModuleStorage.Length > typeTlsIndex)
                 {
-                    object threadStaticBase = moduleStorage[typeTlsIndex];
+                    object threadStaticBase = perModuleStorage[typeTlsIndex];
                     if (threadStaticBase != null)
                     {
                         return threadStaticBase;
index 68b50d5..c6ac7d6 100644 (file)
@@ -561,10 +561,6 @@ namespace System.Runtime
         internal static extern ref object[][] RhGetThreadStaticStorage();
 
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        [RuntimeImport(RuntimeLibrary, "RhGetInlinedThreadStaticStorage")]
-        internal static extern ref object? RhGetInlinedThreadStaticStorage();
-
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "RhRegisterInlinedThreadStaticRoot")]
         internal static extern void RhRegisterInlinedThreadStaticRoot(ref object? root);
 
@@ -592,10 +588,6 @@ namespace System.Runtime
         [RuntimeImport(RuntimeLibrary, "RhGetTargetOfUnboxingAndInstantiatingStub")]
         public static extern IntPtr RhGetTargetOfUnboxingAndInstantiatingStub(IntPtr pCode);
 
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        [RuntimeImport(RuntimeLibrary, "RhGetSingleTypeManager")]
-        public static extern TypeManagerHandle RhGetSingleTypeManager();
-
         //
         // EH helpers
         //
index ff5cee8..67c9931 100644 (file)
@@ -291,6 +291,8 @@ namespace ILCompiler.DependencyAnalysis
                 case RelocType.IMAGE_REL_BASED_ABSOLUTE:
                 case RelocType.IMAGE_REL_BASED_HIGHLOW:
                 case RelocType.IMAGE_REL_SECREL:
+                case RelocType.IMAGE_REL_TLSGD:
+                case RelocType.IMAGE_REL_TPOFF:
                 case RelocType.IMAGE_REL_FILE_ABSOLUTE:
                 case RelocType.IMAGE_REL_BASED_ADDR32NB:
                 case RelocType.IMAGE_REL_SYMBOL_SIZE:
@@ -305,6 +307,14 @@ namespace ILCompiler.DependencyAnalysis
                 case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21:
                 case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12L:
                 case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A:
+
+                case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21:
+                case RelocType.IMAGE_REL_AARCH64_TLSDESC_LD64_LO12:
+                case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADD_LO12:
+                case RelocType.IMAGE_REL_AARCH64_TLSDESC_CALL:
+                case RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_HI12:
+                case RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
+
                 case RelocType.IMAGE_REL_BASED_LOONGARCH64_PC:
                 case RelocType.IMAGE_REL_BASED_LOONGARCH64_JIR:
                     Debug.Assert(delta == 0);
index 6f0c0be..4545833 100644 (file)
@@ -46,7 +46,7 @@ namespace ILCompiler.DependencyAnalysis
         public static readonly ObjectNodeSection ReadOnlyDataSection = new ObjectNodeSection("rdata", SectionType.ReadOnly);
         public static readonly ObjectNodeSection FoldableReadOnlyDataSection = new ObjectNodeSection("rdata", SectionType.ReadOnly);
         public static readonly ObjectNodeSection TextSection = new ObjectNodeSection("text", SectionType.Executable);
-        public static readonly ObjectNodeSection TLSSection = new ObjectNodeSection("TLS", SectionType.Writeable);
+        public static readonly ObjectNodeSection TLSSection = new ObjectNodeSection("tdata", SectionType.Writeable);
         public static readonly ObjectNodeSection BssSection = new ObjectNodeSection("bss", SectionType.Uninitialized);
         public static readonly ObjectNodeSection HydrationTargetSection = new ObjectNodeSection("hydrated", SectionType.Uninitialized);
         public static readonly ObjectNodeSection ManagedCodeWindowsContentSection = new ObjectNodeSection(".managedcode$I", SectionType.Executable);
index 25b94a3..b833c90 100644 (file)
@@ -23,13 +23,35 @@ namespace ILCompiler.DependencyAnalysis
                                                        // This is a special NGEN-specific relocation type
                                                        // for relative pointer (used to make NGen relocation
                                                        // section smaller)
-        IMAGE_REL_SECREL                     = 0x80,   // 32 bit offset from base of section containing target
 
         IMAGE_REL_BASED_ARM64_PAGEBASE_REL21 = 0x81,   // ADRP
         IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A = 0x82,   // ADD/ADDS (immediate) with zero shift, for page offset
         IMAGE_REL_BASED_ARM64_PAGEOFFSET_12L = 0x83,   // LDR (indexed, unsigned immediate), for page offset
 
         //
+        // Relocation operators related to TLS access
+        //
+
+        // Windows x64
+        IMAGE_REL_SECREL                     = 0x104,
+
+        // Linux x64
+        // GD model
+        IMAGE_REL_TLSGD                      = 0x105,
+        // LE model
+        IMAGE_REL_TPOFF                      = 0x106,
+
+        // Linux arm64
+        //    TLSDESC  (dynamic)
+        IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21 = 0x107,
+        IMAGE_REL_AARCH64_TLSDESC_LD64_LO12  = 0x108,
+        IMAGE_REL_AARCH64_TLSDESC_ADD_LO12   = 0x109,
+        IMAGE_REL_AARCH64_TLSDESC_CALL       = 0x10A,
+        //    LE model
+        IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_HI12    = 0x10B,
+        IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_LO12_NC = 0x10C,
+
+        //
         // Relocations for R2R image production
         //
         IMAGE_REL_SYMBOL_SIZE                = 0x1000, // The size of data in the image represented by the target symbol node
@@ -459,6 +481,8 @@ namespace ILCompiler.DependencyAnalysis
                 case RelocType.IMAGE_REL_BASED_REL32:
                 case RelocType.IMAGE_REL_BASED_RELPTR32:
                 case RelocType.IMAGE_REL_SECREL:
+                case RelocType.IMAGE_REL_TLSGD:
+                case RelocType.IMAGE_REL_TPOFF:
                 case RelocType.IMAGE_REL_FILE_ABSOLUTE:
                 case RelocType.IMAGE_REL_SYMBOL_SIZE:
                     return *(int*)location;
@@ -475,6 +499,20 @@ namespace ILCompiler.DependencyAnalysis
                     return GetArm64Rel21((uint*)location);
                 case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A:
                     return GetArm64Rel12((uint*)location);
+                case RelocType.IMAGE_REL_AARCH64_TLSDESC_LD64_LO12:
+                case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADD_LO12:
+                case RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_HI12:
+                case RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
+                    // TLS relocs do not have offsets
+                    Debug.Assert((GetArm64Rel12((uint*)location) & 0xFF) == 0);
+                    return 0;
+                case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21:
+                    // TLS relocs do not have offsets
+                    Debug.Assert((GetArm64Rel21((uint*)location) & 0xFF) == 0);
+                    return 0;
+                case RelocType.IMAGE_REL_AARCH64_TLSDESC_CALL:
+                    // TLS relocs do not have offsets
+                    return 0;
                 case RelocType.IMAGE_REL_BASED_LOONGARCH64_PC:
                     return (long)GetLoongArch64PC12((uint*)location);
                 case RelocType.IMAGE_REL_BASED_LOONGARCH64_JIR:
index 45b5018..d516b57 100644 (file)
@@ -176,10 +176,18 @@ namespace ILCompiler.DependencyAnalysis.ARM64
 
         public void EmitRETIfEqual()
         {
+            // b.ne #8
             Builder.EmitUInt(0b01010100_0000000000000000010_0_0001u);
             EmitRET();
         }
 
+        public void EmitRETIfNotEqual()
+        {
+            // b.eq #8
+            Builder.EmitUInt(0b01010100_0000000000000000010_0_0000u);
+            EmitRET();
+        }
+
         public void EmitJE(ISymbolNode symbol)
         {
             uint offset = symbol.RepresentsIndirectionCell ? 6u : 2u;
@@ -189,6 +197,15 @@ namespace ILCompiler.DependencyAnalysis.ARM64
             EmitJMP(symbol);
         }
 
+        public void EmitJNE(ISymbolNode symbol)
+        {
+            uint offset = symbol.RepresentsIndirectionCell ? 6u : 2u;
+
+            Builder.EmitUInt(0b01010100_0000000000000000000_0_0000u | offset << 5);
+
+            EmitJMP(symbol);
+        }
+
         private static bool InSignedByteRange(int i)
         {
             return i == (int)(sbyte)i;
index b88bb7f..321da23 100644 (file)
@@ -117,6 +117,20 @@ namespace ILCompiler.DependencyAnalysis.X64
             }
         }
 
+        public void EmitJNE(ISymbolNode symbol)
+        {
+            if (symbol.RepresentsIndirectionCell)
+            {
+                throw new NotImplementedException();
+            }
+            else
+            {
+                Builder.EmitByte(0x0f);
+                Builder.EmitByte(0x85);
+                Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_REL32);
+            }
+        }
+
         public void EmitINT3()
         {
             Builder.EmitByte(0xCC);
index fd21b72..2907ba2 100644 (file)
@@ -101,7 +101,7 @@ namespace Internal.TypeSystem
             return builder.ToGCMap();
         }
 
-        private static void MapThreadStaticsForType(GCPointerMapBuilder builder, MetadataType type, int baseOffset)
+        private static void MapThreadStaticsForType(ref GCPointerMapBuilder builder, MetadataType type, int baseOffset)
         {
             foreach (FieldDesc field in type.GetFields())
             {
@@ -133,7 +133,7 @@ namespace Internal.TypeSystem
         {
             GCPointerMapBuilder builder = new GCPointerMapBuilder(type.ThreadGcStaticFieldSize.AsInt, type.Context.Target.PointerSize);
 
-            MapThreadStaticsForType(builder, type, baseOffset: 0);
+            MapThreadStaticsForType(ref builder, type, baseOffset: 0);
 
             Debug.Assert(builder.ToGCMap().Size * type.Context.Target.PointerSize >= type.ThreadGcStaticFieldSize.AsInt);
             return builder.ToGCMap();
@@ -148,7 +148,7 @@ namespace Internal.TypeSystem
             GCPointerMapBuilder builder = new GCPointerMapBuilder(threadStaticSize, pointerSize);
             foreach (var type in types)
             {
-                MapThreadStaticsForType(builder, type, offsets[type]);
+                MapThreadStaticsForType(ref builder, type, offsets[type]);
             }
 
             return builder.ToGCMap();
index c739d0d..7a21199 100644 (file)
@@ -9,5 +9,6 @@ namespace ILCompiler.DependencyAnalysis
         EnsureClassConstructorRunAndReturnNonGCStaticBase,
         EnsureClassConstructorRunAndReturnThreadStaticBase,
         GetThreadStaticBaseForType,
+        GetInlinedThreadStaticBaseSlow,
     }
 }
index 415f887..6f17af2 100644 (file)
@@ -1041,7 +1041,8 @@ namespace ILCompiler.DependencyAnalysis
             new string[] { "System.Runtime.CompilerServices", "ClassConstructorRunner", "CheckStaticClassConstructionReturnGCStaticBase" },
             new string[] { "System.Runtime.CompilerServices", "ClassConstructorRunner", "CheckStaticClassConstructionReturnNonGCStaticBase" },
             new string[] { "System.Runtime.CompilerServices", "ClassConstructorRunner", "CheckStaticClassConstructionReturnThreadStaticBase" },
-            new string[] { "Internal.Runtime", "ThreadStatics", "GetThreadStaticBaseForType" }
+            new string[] { "Internal.Runtime", "ThreadStatics", "GetThreadStaticBaseForType" },
+            new string[] { "Internal.Runtime", "ThreadStatics", "GetInlinedThreadStaticBaseSlow" },
         };
 
         private ISymbolNode[] _helperEntrypointSymbols;
@@ -1288,6 +1289,8 @@ namespace ILCompiler.DependencyAnalysis
 
         protected internal TypeManagerIndirectionNode TypeManagerIndirection = new TypeManagerIndirectionNode();
 
+        protected internal TlsRootNode TlsRoot = new TlsRootNode();
+
         public virtual void AttachToDependencyGraph(DependencyAnalyzerBase<NodeFactory> graph)
         {
             ReadyToRunHeader = new ReadyToRunHeaderNode();
@@ -1306,6 +1309,7 @@ namespace ILCompiler.DependencyAnalysis
             if (_inlinedThreadStatics.IsComputed())
             {
                 graph.AddRoot(_inlinedThreadStatiscNode, "Inlined threadstatics are used if present");
+                graph.AddRoot(TlsRoot, "Inlined threadstatics are used if present");
             }
 
             ReadyToRunHeader.Add(ReadyToRunSectionType.GCStaticRegion, GCStaticsRegion);
index fb6adba..9e72410 100644 (file)
@@ -1079,6 +1079,13 @@ namespace ILCompiler.DependencyAnalysis
                                 case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21:
                                 case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A:
                                 case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12L:
+
+                                case RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_HI12:
+                                case RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
+                                case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21:
+                                case RelocType.IMAGE_REL_AARCH64_TLSDESC_LD64_LO12:
+                                case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADD_LO12:
+                                case RelocType.IMAGE_REL_AARCH64_TLSDESC_CALL:
                                     unsafe
                                     {
                                         fixed (void* location = &nodeContents.Data[i])
index 8e43f12..83c2b3d 100644 (file)
@@ -74,26 +74,25 @@ namespace ILCompiler.DependencyAnalysis
                         ISortableSymbolNode index = factory.TypeThreadStaticIndex(target);
                         if (index is TypeThreadStaticIndexNode ti && ti.Type == null)
                         {
-                            ISymbolNode helper = factory.ExternSymbol("RhpGetInlinedThreadStaticBase");
-
                             if (!factory.PreinitializationManager.HasLazyStaticConstructor(target))
                             {
-                                encoder.EmitJMP(helper);
+                                EmitInlineTLSAccess(factory, ref encoder);
                             }
                             else
                             {
+                                // First arg: unused address of the TypeManager
+                                // encoder.EmitMOV(encoder.TargetRegister.Arg0, (ushort)0);
+
+                                // Second arg: ~0 (index of inlined storage)
+                                encoder.EmitMVN(encoder.TargetRegister.Arg1, 0);
+
                                 encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target));
                                 encoder.EmitSUB(encoder.TargetRegister.Arg2, NonGCStaticsNode.GetClassConstructorContextSize(factory.Target));
-
                                 encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2);
                                 encoder.EmitCMP(encoder.TargetRegister.Arg3, 0);
-                                encoder.EmitJE(helper);
 
-                                // First arg: unused address of the TypeManager
-                                encoder.EmitMOV(encoder.TargetRegister.Arg0, (ushort)0);
-                                // Second arg: ~0 (index of inlined storage)
-                                encoder.EmitMVN(encoder.TargetRegister.Arg1, 0);
-                                encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase));
+                                encoder.EmitJNE(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase));
+                                EmitInlineTLSAccess(factory, ref encoder);
                             }
                         }
                         else
@@ -227,5 +226,81 @@ namespace ILCompiler.DependencyAnalysis
                     throw new NotImplementedException();
             }
         }
+
+        // emits code that results in ThreadStaticBase referenced in X0.
+        // may trash volatile registers. (there are calls to the slow helper and possibly to the platform's TLS support)
+        private static void EmitInlineTLSAccess(NodeFactory factory, ref ARM64Emitter encoder)
+        {
+            ISymbolNode getInlinedThreadStaticBaseSlow = factory.HelperEntrypoint(HelperEntrypoint.GetInlinedThreadStaticBaseSlow);
+            ISymbolNode tlsRoot = factory.TlsRoot;
+            // IsSingleFileCompilation is not enough to guarantee that we can use "Initial Executable" optimizations.
+            // we need a special compiler flag analogous to /GA. Just assume "false" for now.
+            // bool isInitialExecutable = factory.CompilationModuleGroup.IsSingleFileCompilation;
+            bool isInitialExecutable = false;
+
+            if (factory.Target.OperatingSystem == TargetOS.Linux)
+            {
+                if (isInitialExecutable)
+                {
+                    // mrs  x0, tpidr_el0
+                    encoder.Builder.EmitUInt(0xd53bd040);
+
+                    // add  x0, x0, #:tprel_hi12:tlsRoot, lsl #12
+                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_HI12);
+                    encoder.Builder.EmitUInt(0x91400000);
+
+                    // add  x1, x0, #:tprel_lo12_nc:tlsRoot, lsl #0
+                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_LO12_NC);
+                    encoder.Builder.EmitUInt(0x91000001);
+                }
+                else
+                {
+                    // stp     x29, x30, [sp, -16]!
+                    encoder.Builder.EmitUInt(0xa9bf7bfd);
+                    // mov     x29, sp
+                    encoder.Builder.EmitUInt(0x910003fd);
+
+                    // mrs     x1, tpidr_el0
+                    encoder.Builder.EmitUInt(0xd53bd041);
+
+                    // adrp    x0, :tlsdesc:tlsRoot
+                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21);
+                    encoder.Builder.EmitUInt(0x90000000);
+
+                    // ldr     x2, [x0, #:tlsdesc_lo12:tlsRoot]
+                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_AARCH64_TLSDESC_LD64_LO12);
+                    encoder.Builder.EmitUInt(0xf9400002);
+
+                    // add     x0, x0, :tlsdesc_lo12:tlsRoot
+                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_AARCH64_TLSDESC_ADD_LO12);
+                    encoder.Builder.EmitUInt(0x91000000);
+
+                    // blr     :tlsdesc_call:tlsRoot:x2
+                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_AARCH64_TLSDESC_CALL);
+                    encoder.Builder.EmitUInt(0xd63f0040);
+
+                    // add     x1, x1, x0
+                    encoder.Builder.EmitUInt(0x8b000021);
+
+                    // ldp     x29, x30, [sp], 16
+                    encoder.Builder.EmitUInt(0xa8c17bfd);
+                }
+
+                encoder.EmitLDR(Register.X0, Register.X1);
+
+                // here we have:
+                // X1: addr, X0: storage
+                // if the storage is already allocated, just return, otherwise do slow path.
+
+                encoder.EmitCMP(Register.X0, 0);
+                encoder.EmitRETIfNotEqual();
+                encoder.EmitMOV(Register.X0, Register.X1);
+                encoder.EmitJMP(getInlinedThreadStaticBaseSlow);
+            }
+            else
+            {
+                throw new NotImplementedException();
+            }
+        }
     }
 }
index 15c0a98..8fbd5f6 100644 (file)
@@ -74,25 +74,25 @@ namespace ILCompiler.DependencyAnalysis
                         ISortableSymbolNode index = factory.TypeThreadStaticIndex(target);
                         if (index is TypeThreadStaticIndexNode ti && ti.Type == null)
                         {
-                            ISymbolNode helper = factory.ExternSymbol("RhpGetInlinedThreadStaticBase");
-
                             if (!factory.PreinitializationManager.HasLazyStaticConstructor(target))
                             {
-                                encoder.EmitJMP(helper);
+                                EmitInlineTLSAccess(factory, ref encoder);
                             }
                             else
                             {
+                                // First arg: unused address of the TypeManager
+                                // encoder.EmitMOV(encoder.TargetRegister.Arg0, 0);
+
+                                // Second arg: -1 (index of inlined storage)
+                                encoder.EmitMOV(encoder.TargetRegister.Arg1, -1);
+
                                 encoder.EmitLEAQ(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target), -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target));
 
                                 AddrMode initialized = new AddrMode(encoder.TargetRegister.Arg2, null, 0, 0, AddrModeSize.Int64);
                                 encoder.EmitCMP(ref initialized, 0);
-                                encoder.EmitJE(helper);
+                                encoder.EmitJNE(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase));
 
-                                // First arg: unused address of the TypeManager
-                                encoder.EmitMOV(encoder.TargetRegister.Arg0, 0);
-                                // Second arg: -1 (index of inlined storage)
-                                encoder.EmitMOV(encoder.TargetRegister.Arg1, -1);
-                                encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase));
+                                EmitInlineTLSAccess(factory, ref encoder);
                             }
                         }
                         else
@@ -225,5 +225,99 @@ namespace ILCompiler.DependencyAnalysis
                     throw new NotImplementedException();
             }
         }
+
+        // emits code that results in ThreadStaticBase referenced in RAX.
+        // may trash volatile registers. (there are calls to the slow helper and possibly to platform's TLS support)
+        private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter encoder)
+        {
+            ISymbolNode getInlinedThreadStaticBaseSlow = factory.HelperEntrypoint(HelperEntrypoint.GetInlinedThreadStaticBaseSlow);
+            ISymbolNode tlsRoot = factory.TlsRoot;
+            // IsSingleFileCompilation is not enough to guarantee that we can use "Initial Executable" optimizations.
+            // we need a special compiler flag analogous to /GA. Just assume "false" for now.
+            // bool isInitialExecutable = factory.CompilationModuleGroup.IsSingleFileCompilation;
+            bool isInitialExecutable = false;
+
+            if (factory.Target.IsWindows)
+            {
+                if (isInitialExecutable)
+                {
+                    // mov         rax,qword ptr gs:[58h]
+                    encoder.Builder.EmitBytes(new byte[] { 0x65, 0x48, 0x8B, 0x04, 0x25, 0x58, 0x00, 0x00, 0x00 });
+
+                    // mov         ecx, SECTIONREL tlsRoot
+                    encoder.Builder.EmitBytes(new byte[] { 0xB9 });
+                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_SECREL);
+
+                    // add         rcx,qword ptr [rax]
+                    encoder.Builder.EmitBytes(new byte[] { 0x48, 0x03, 0x08 });
+                }
+                else
+                {
+                    // mov         ecx,dword ptr [_tls_index]
+                    encoder.Builder.EmitBytes(new byte[] { 0x8B, 0x0D });
+                    encoder.Builder.EmitReloc(factory.ExternSymbol("_tls_index"), RelocType.IMAGE_REL_BASED_REL32);
+
+                    // mov         rax,qword ptr gs:[58h]
+                    encoder.Builder.EmitBytes(new byte[] { 0x65, 0x48, 0x8B, 0x04, 0x25, 0x58, 0x00, 0x00, 0x00 });
+
+                    // mov         rax,qword ptr [rax+rcx*8]
+                    encoder.Builder.EmitBytes(new byte[] { 0x48, 0x8B, 0x04, 0xC8 });
+
+                    // mov         ecx, SECTIONREL tlsRoot
+                    encoder.Builder.EmitBytes(new byte[] { 0xB9 });
+                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_SECREL);
+
+                    // add         rcx,rax
+                    encoder.Builder.EmitBytes(new byte[] { 0x48, 0x01, 0xC1 });
+                }
+
+                // mov rax, qword ptr[rcx]
+                encoder.Builder.EmitBytes(new byte[] { 0x48, 0x8b, 0x01 });
+                encoder.EmitCompareToZero(Register.RAX);
+                encoder.EmitJE(getInlinedThreadStaticBaseSlow);
+                encoder.EmitRET();
+            }
+            else if (factory.Target.OperatingSystem == TargetOS.Linux)
+            {
+                if (isInitialExecutable)
+                {
+                    // movq %fs:0x0,%rax
+                    encoder.Builder.EmitBytes(new byte[] { 0x64, 0x48, 0x8B, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 });
+
+                    // leaq tlsRoot@TPOFF(%rax), %rdi
+                    encoder.Builder.EmitBytes(new byte[] { 0x48, 0x8D, 0xB8 });
+                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_TPOFF);
+                }
+                else
+                {
+                    // data16 leaq tlsRoot@TLSGD(%rip), %rdi
+                    encoder.Builder.EmitBytes(new byte[] { 0x66, 0x48, 0x8D, 0x3D });
+                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_TLSGD, -4);
+
+                    // data16 data16 rex.W callq __tls_get_addr@PLT
+                    encoder.Builder.EmitBytes(new byte[] { 0x66, 0x66, 0x48, 0xE8 });
+                    encoder.Builder.EmitReloc(factory.ExternSymbol("__tls_get_addr"), RelocType.IMAGE_REL_BASED_REL32);
+
+                    encoder.EmitMOV(Register.RDI, Register.RAX);
+                }
+
+                // mov  rax, qword ptr[rdi]
+                encoder.Builder.EmitBytes(new byte[] { 0x48, 0x8B, 0x07 });
+                encoder.EmitCompareToZero(Register.RAX);
+                encoder.EmitJE(getInlinedThreadStaticBaseSlow);
+                encoder.EmitRET();
+            }
+            else if (factory.Target.IsOSXLike)
+            {
+                // movq _\Var @TLVP(% rip), % rdi
+                // callq * (% rdi)
+
+                throw new NotImplementedException();
+            }
+            else
+            {
+                throw new NotImplementedException();
+            }
+        }
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TlsRootNode.cs
new file mode 100644 (file)
index 0000000..424ba71
--- /dev/null
@@ -0,0 +1,40 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Internal.Text;
+
+namespace ILCompiler.DependencyAnalysis
+{
+    public class TlsRootNode : ObjectNode, ISymbolDefinitionNode
+    {
+        public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
+        {
+            sb.Append(nameMangler.CompilationUnitPrefix).Append("tls_InlinedThreadStatics");
+        }
+        public int Offset => 0;
+        public override bool IsShareable => false;
+
+        protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler);
+
+        public override ObjectNodeSection GetSection(NodeFactory factory) => ObjectNodeSection.TLSSection;
+
+        public override bool StaticDependenciesAreComputed => true;
+
+        public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
+        {
+            ObjectDataBuilder objData = new ObjectDataBuilder(factory, relocsOnly);
+            objData.RequireInitialPointerAlignment();
+            objData.AddSymbol(this);
+
+            // root
+            objData.EmitZeroPointer();
+
+            // next
+            objData.EmitZeroPointer();
+
+            return objData.ToObjectData();
+        }
+
+        public override int ClassCode => -985742028;
+    }
+}
index 772970b..f91853d 100644 (file)
@@ -83,7 +83,12 @@ namespace ILCompiler.DependencyAnalysis
                 }
             }
 
+            // needed to construct storage
             objData.EmitPointerReloc(factory.TypeManagerIndirection);
+
+            // tls storage ID for uninlined types. used to:
+            // - get the type from the type manager
+            // - get the slot from the per-type storage array
             objData.EmitNaturalInt(typeTlsIndex);
 
             return objData.ToObjectData();
index ff83282..fb5b514 100644 (file)
     <Compile Include="Compiler\DependencyAnalysis\ReadyToRunHeaderNode.cs" />
     <Compile Include="Compiler\DependencyAnalysis\ModulesSectionNode.cs" />
     <Compile Include="Compiler\DependencyAnalysis\TypeManagerIndirectionNode.cs" />
+    <Compile Include="Compiler\DependencyAnalysis\TlsRootNode.cs" />
     <Compile Include="Compiler\DependencyAnalysis\BlobNode.cs" />
     <Compile Include="Compiler\DependencyAnalysis\EETypeNode.cs" />
     <Compile Include="Compiler\DependencyAnalysis\CanonicalDefinitionEETypeNode.cs" />
index ae5e9f1..6b71f44 100644 (file)
@@ -491,10 +491,9 @@ namespace ILCompiler
                     builder.UsePreinitializationManager(preinitManager);
                 }
 
-                // If we have a scanner, we can inline threadstatics storage using the information
-                // we collected at scanning time.
-                // Inlined storage implies a single type manager, thus we do not do it in multifile case.
-                if (!multiFile && !Get(_command.NoInlineTls))
+                // If we have a scanner, we can inline threadstatics storage using the information we collected at scanning time.
+                if (!Get(_command.NoInlineTls) &&
+                    (targetOS == TargetOS.Linux || (targetArchitecture == TargetArchitecture.X64 && targetOS == TargetOS.Windows)))
                 {
                     builder.UseInlinedThreadStatics(scanResults.GetInlinedThreadStatics());
                 }