From d6775052de6b7aaeaef17fdac1c796c2b99a91ee Mon Sep 17 00:00:00 2001 From: Pavel Kopyl Date: Mon, 9 Jan 2023 17:55:55 +0300 Subject: [PATCH] [NVPTX] Set default version of architecture to SM_30, PTX to 6.0. Support of variadic functions triggers an assertion on several tests from llvm/test/CodeGen/Generic/ if nvptx64-* is specified as a default triplet: Support for variadic functions (unsized array parameter) introduced in PTX ISA version 6.0 and requires target sm_30. That happens because those tests contain variadic function calls and default versions of both PTX ISA (3.2) and architecture (sm_20) are below the minimally required. There were no observable problems with these tests before adding support of variadic functions, because nvptx backend just didn't handle them properly generating invalid PTX code. Differential Revision: https://reviews.llvm.org/D141054 --- llvm/lib/Target/NVPTX/NVPTX.td | 6 +++--- llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp | 6 +++--- llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll | 2 +- llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll | 2 +- llvm/test/CodeGen/NVPTX/sm-version.ll | 4 +++- llvm/test/CodeGen/NVPTX/surf-tex.py | 4 ++-- llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll | 2 +- 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td index dcdd1286..4d4203c 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -115,11 +115,11 @@ def PTX78 : SubtargetFeature<"ptx78", "PTXVersion", "78", class Proc Features> : Processor; -def : Proc<"sm_20", [SM20]>; -def : Proc<"sm_21", [SM21]>; +def : Proc<"sm_20", [SM20, PTX32]>; +def : Proc<"sm_21", [SM21, PTX32]>; def : Proc<"sm_30", [SM30]>; def : Proc<"sm_32", [SM32, PTX40]>; -def : Proc<"sm_35", [SM35]>; +def : Proc<"sm_35", [SM35, PTX32]>; def : Proc<"sm_37", [SM37, PTX41]>; def : Proc<"sm_50", [SM50, PTX40]>; def : Proc<"sm_52", [SM52, PTX41]>; diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index a03492a..2347f46 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -33,13 +33,13 @@ void NVPTXSubtarget::anchor() {} NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { // Provide the default CPU if we don't have one. - TargetName = std::string(CPU.empty() ? "sm_20" : CPU); + TargetName = std::string(CPU.empty() ? "sm_30" : CPU); ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS); - // Set default to PTX 3.2 (CUDA 5.5) + // Set default to PTX 6.0 (CUDA 9.0) if (PTXVersion == 0) { - PTXVersion = 32; + PTXVersion = 60; } return *this; diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll index c4be3b0..e8c554c 100644 --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll @@ -1,7 +1,7 @@ ; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type. ; Verify that __nvvm_reflect() is replaced with an appropriate value. ; -; RUN: opt %s -S -passes='default' -mtriple=nvptx64 \ +; RUN: opt %s -S -passes='default' -mtriple=nvptx64 -mcpu=sm_20 \ ; RUN: | FileCheck %s --check-prefixes=COMMON,SM20 ; RUN: opt %s -S -passes='default' -mtriple=nvptx64 -mcpu=sm_35 \ ; RUN: | FileCheck %s --check-prefixes=COMMON,SM35 diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll index 7a5a5a7..2c9ea47 100644 --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll @@ -1,6 +1,6 @@ ; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value ; -; RUN: opt %s -S -passes='default' -mtriple=nvptx64 \ +; RUN: opt %s -S -passes='default' -mtriple=nvptx64 -mcpu=sm_20 \ ; RUN: | FileCheck %s --check-prefixes=COMMON,SM20 ; RUN: opt %s -S -passes='default' -mtriple=nvptx64 -mcpu=sm_35 \ ; RUN: | FileCheck %s --check-prefixes=COMMON,SM35 diff --git a/llvm/test/CodeGen/NVPTX/sm-version.ll b/llvm/test/CodeGen/NVPTX/sm-version.ll index 7cb56a8..eacd7f3 100644 --- a/llvm/test/CodeGen/NVPTX/sm-version.ll +++ b/llvm/test/CodeGen/NVPTX/sm-version.ll @@ -32,7 +32,9 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s --check-prefix=SM80 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_86 | FileCheck %s --check-prefix=SM86 -; SM30: .version 3.2 +; SM20: .version 3.2 +; SM21: .version 3.2 +; SM30: .version 6.0 ; SM32: .version 4.0 ; SM35: .version 3.2 ; SM37: .version 4.1 diff --git a/llvm/test/CodeGen/NVPTX/surf-tex.py b/llvm/test/CodeGen/NVPTX/surf-tex.py index c0b40f1..4e239ae 100644 --- a/llvm/test/CodeGen/NVPTX/surf-tex.py +++ b/llvm/test/CodeGen/NVPTX/surf-tex.py @@ -1,6 +1,6 @@ # RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll -# RUN: llc %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll -# RUN: %if ptxas %{ llc %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %} +# RUN: llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll +# RUN: %if ptxas %{ llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %} # We only need to run this second time for texture tests, because # there is a difference between unified and non-unified intrinsics. diff --git a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll index b5c13da..65feae8 100644 --- a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll +++ b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-nvidia-cuda | %ptxas-verify %} -; CHECK: .target sm_20, debug +; CHECK: .target sm_30, debug ; CHECK: .visible .func use_dbg_declare() ; CHECK: .local .align 8 .b8 __local_depot0[8]; -- 2.7.4