From af54d1e852850edcc7b9485851320d9ebf1be4fe Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6@vols.utk.edu>
Date: Thu, 23 Mar 2023 14:15:01 -0500
Subject: [PATCH] [NVPTX] Set the atomic inling threshold when targeting NVPTX
 directly

Since Clang 16.0.0 users can target the `NVPTX` architecture directly
via `--target=nvptx64-nvidia-cuda`. However, this does not set the
atomic inlining size correctly. This leads to spurious warnings and
emission of runtime atomics that are never implemented. This patch
ensures that we set this to the appropriate pointer width. This will
always be 64 in the future as `nvptx64` will only be supported moving
forward.

Fixes: https://github.com/llvm/llvm-project/issues/61410

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D146750
---
 clang/lib/Basic/Targets/NVPTX.cpp     |  2 ++
 clang/test/CodeGen/atomics-inlining.c | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index bacd93e..aca51b2 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -93,6 +93,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
     default:
       llvm_unreachable("TargetPointerWidth must be 32 or 64");
     }
+
+    MaxAtomicInlineWidth = TargetPointerWidth;
     return;
   }
 
diff --git a/clang/test/CodeGen/atomics-inlining.c b/clang/test/CodeGen/atomics-inlining.c
index ade0e3d..862c630 100644
--- a/clang/test/CodeGen/atomics-inlining.c
+++ b/clang/test/CodeGen/atomics-inlining.c
@@ -8,6 +8,7 @@
 // RUN: %clang_cc1 -triple mipsisa64r6el-linux-gnuabi64 -emit-llvm %s -o - | FileCheck %s -check-prefix=MIPS64
 // RUN: %clang_cc1 -triple sparc-unknown-eabi -emit-llvm %s -o - | FileCheck %s -check-prefix=SPARCV8 -check-prefix=SPARC
 // RUN: %clang_cc1 -triple sparcv9-unknown-eabi -emit-llvm %s -o - | FileCheck %s -check-prefix=SPARCV9 -check-prefix=SPARC
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s -check-prefix=NVPTX
 
 unsigned char c1, c2;
 unsigned short s1, s2;
@@ -109,4 +110,17 @@ void test1(void) {
 // SPARCV9: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
 // SPARCV8: call void @__atomic_load(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
 // SPARCV8: call void @__atomic_store(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
+
+// NVPTX-LABEL: define{{.*}} void @test1
+// NVPTX: = load atomic i8, ptr @c1 seq_cst, align 1
+// NVPTX: store atomic i8 {{.*}}, ptr @c1 seq_cst, align 1
+// NVPTX: = load atomic i16, ptr @s1 seq_cst, align 2
+// NVPTX: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2
+// NVPTX: = load atomic i32, ptr @i1 seq_cst, align 4
+// NVPTX: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
+// NVPTX: = load atomic i64, ptr @ll1 seq_cst, align 8
+// NVPTX: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
+// NVPTX: call void @__atomic_load(i64 noundef 100, ptr noundef @a1, ptr noundef @a2, i32 noundef 5)
+// NVPTX: call void @__atomic_store(i64 noundef 100, ptr noundef @a1, ptr noundef @a2, i32 noundef 5)
+
 }
-- 
2.7.4