[NVPTX] Set default version of architecture to SM_30, PTX to 6.0.

author Pavel Kopyl <pavelkopyl@gmail.com>

Mon, 9 Jan 2023 14:55:55 +0000 (17:55 +0300)

committer Andrew Savonichev <andrew.savonichev@gmail.com>

Tue, 10 Jan 2023 12:22:40 +0000 (15:22 +0300)
author Pavel Kopyl <pavelkopyl@gmail.com>
Mon, 9 Jan 2023 14:55:55 +0000 (17:55 +0300)
committer Andrew Savonichev <andrew.savonichev@gmail.com>
Tue, 10 Jan 2023 12:22:40 +0000 (15:22 +0300)
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td

index dcdd128..4d4203c 100644 (file)
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -115,11 +115,11 @@ def PTX78 : SubtargetFeature<"ptx78", "PTXVersion", "78",
  class Proc<string Name, list<SubtargetFeature> Features>
   : Processor<Name, NoItineraries, Features>;
  
-def : Proc<"sm_20", [SM20]>;
-def : Proc<"sm_21", [SM21]>;
+def : Proc<"sm_20", [SM20, PTX32]>;
+def : Proc<"sm_21", [SM21, PTX32]>;
  def : Proc<"sm_30", [SM30]>;
  def : Proc<"sm_32", [SM32, PTX40]>;
-def : Proc<"sm_35", [SM35]>;
+def : Proc<"sm_35", [SM35, PTX32]>;
  def : Proc<"sm_37", [SM37, PTX41]>;
  def : Proc<"sm_50", [SM50, PTX40]>;
  def : Proc<"sm_52", [SM52, PTX41]>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp

index a03492a..2347f46 100644 (file)
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -33,13 +33,13 @@ void NVPTXSubtarget::anchor() {}
  NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                                  StringRef FS) {
      // Provide the default CPU if we don't have one.
-    TargetName = std::string(CPU.empty() ? "sm_20" : CPU);
+    TargetName = std::string(CPU.empty() ? "sm_30" : CPU);
  
      ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);
  
-    // Set default to PTX 3.2 (CUDA 5.5)
+    // Set default to PTX 6.0 (CUDA 9.0)
      if (PTXVersion == 0) {
-      PTXVersion = 32;
+      PTXVersion = 60;
    }
  
    return *this;
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll

index c4be3b0..e8c554c 100644 (file)
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
@@ -1,7 +1,7 @@
  ; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
  ; Verify that __nvvm_reflect() is replaced with an appropriate value.
  ;
-; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 \
+; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
  ; RUN:   | FileCheck %s --check-prefixes=COMMON,SM20
  ; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
  ; RUN:   | FileCheck %s --check-prefixes=COMMON,SM35
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll

index 7a5a5a7..2c9ea47 100644 (file)
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll
@@ -1,6 +1,6 @@
  ; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value
  ;
-; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 \
+; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
  ; RUN:   | FileCheck %s --check-prefixes=COMMON,SM20
  ; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
  ; RUN:   | FileCheck %s --check-prefixes=COMMON,SM35
diff --git a/llvm/test/CodeGen/NVPTX/sm-version.ll b/llvm/test/CodeGen/NVPTX/sm-version.ll

index 7cb56a8..eacd7f3 100644 (file)
--- a/llvm/test/CodeGen/NVPTX/sm-version.ll
+++ b/llvm/test/CodeGen/NVPTX/sm-version.ll
@@ -32,7 +32,9 @@
  ; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s --check-prefix=SM80
  ; RUN: llc < %s -march=nvptx64 -mcpu=sm_86 | FileCheck %s --check-prefix=SM86
  
-; SM30: .version 3.2
+; SM20: .version 3.2
+; SM21: .version 3.2
+; SM30: .version 6.0
  ; SM32: .version 4.0
  ; SM35: .version 3.2
  ; SM37: .version 4.1
diff --git a/llvm/test/CodeGen/NVPTX/surf-tex.py b/llvm/test/CodeGen/NVPTX/surf-tex.py

index c0b40f1..4e239ae 100644 (file)
--- a/llvm/test/CodeGen/NVPTX/surf-tex.py
+++ b/llvm/test/CodeGen/NVPTX/surf-tex.py
@@ -1,6 +1,6 @@
  # RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
-# RUN: llc %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
-# RUN: %if ptxas %{ llc %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
+# RUN: llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
+# RUN: %if ptxas %{ llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
  
  # We only need to run this second time for texture tests, because
  # there is a difference between unified and non-unified intrinsics.
diff --git a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll

index b5c13da..65feae8 100644 (file)
--- a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
+++ b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
@@ -1,7 +1,7 @@
  ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda | FileCheck %s
  ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-nvidia-cuda | %ptxas-verify %}
  
-; CHECK: .target sm_20, debug
+; CHECK: .target sm_30, debug
  
  ; CHECK: .visible .func use_dbg_declare()
  ; CHECK: .local .align 8 .b8 __local_depot0[8];
author	Pavel Kopyl <pavelkopyl@gmail.com>
	Mon, 9 Jan 2023 14:55:55 +0000 (17:55 +0300)
committer	Andrew Savonichev <andrew.savonichev@gmail.com>
	Tue, 10 Jan 2023 12:22:40 +0000 (15:22 +0300)
llvm/lib/Target/NVPTX/NVPTX.td		patch \| blob \| history
llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp		patch \| blob \| history
llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll		patch \| blob \| history
llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll		patch \| blob \| history
llvm/test/CodeGen/NVPTX/sm-version.ll		patch \| blob \| history
llvm/test/CodeGen/NVPTX/surf-tex.py		patch \| blob \| history
llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll		patch \| blob \| history