From 7aa3cad46ac3258f9b069fbdaad8b73e280ebfbb Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Mon, 15 Mar 2021 14:27:06 -0400 Subject: [PATCH] [NVPTX] Enable lowering of atomics on local memory LLVM does not have valid assembly backends for atomicrmw on local memory. However, as this memory is thread local, we should be able to lower this to the relevant load/store. Differential Revision: https://reviews.llvm.org/D98650 --- llvm/include/llvm/Transforms/Scalar/LowerAtomic.h | 6 ++ llvm/lib/Target/NVPTX/CMakeLists.txt | 1 + llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp | 70 +++++++++++++++++++++++ llvm/lib/Target/NVPTX/NVPTXAtomicLower.h | 22 +++++++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 4 ++ llvm/lib/Transforms/Scalar/LowerAtomic.cpp | 4 +- llvm/test/CodeGen/NVPTX/atomic-lower-local.ll | 18 ++++++ 7 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp create mode 100644 llvm/lib/Target/NVPTX/NVPTXAtomicLower.h create mode 100644 llvm/test/CodeGen/NVPTX/atomic-lower-local.ll diff --git a/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h b/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h index 1d55508..87d945d 100644 --- a/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h +++ b/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h @@ -24,6 +24,12 @@ public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &); static bool isRequired() { return true; } }; + +class AtomicRMWInst; +/// Convert the given RMWI into primitive load and stores, +/// assuming that doing so is legal. Return true if the lowering +/// succeeds. +bool lowerAtomicRMWInst(AtomicRMWInst *RMWI); } #endif // LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt index 6a678ec5..4db593b 100644 --- a/llvm/lib/Target/NVPTX/CMakeLists.txt +++ b/llvm/lib/Target/NVPTX/CMakeLists.txt @@ -12,6 +12,7 @@ add_public_tablegen_target(NVPTXCommonTableGen) set(NVPTXCodeGen_sources NVPTXAllocaHoisting.cpp + NVPTXAtomicLower.cpp NVPTXAsmPrinter.cpp NVPTXAssignValidGlobalNames.cpp NVPTXFrameLowering.cpp diff --git a/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp new file mode 100644 index 0000000..10bf56f --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp @@ -0,0 +1,70 @@ +//===-- NVPTXAtomicLower.cpp - Lower atomics of local memory ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Lower atomics of local memory to simple load/stores +// +//===----------------------------------------------------------------------===// + +#include "NVPTXAtomicLower.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Transforms/Scalar/LowerAtomic.h" + +#include "MCTargetDesc/NVPTXBaseInfo.h" +using namespace llvm; + +namespace { +// Hoisting the alloca instructions in the non-entry blocks to the entry +// block. +class NVPTXAtomicLower : public FunctionPass { +public: + static char ID; // Pass ID + NVPTXAtomicLower() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } + + StringRef getPassName() const override { + return "NVPTX lower atomics of local memory"; + } + + bool runOnFunction(Function &F) override; +}; +} // namespace + +bool NVPTXAtomicLower::runOnFunction(Function &F) { + SmallVector LocalMemoryAtomics; + for (Instruction &I : instructions(F)) + if (AtomicRMWInst *RMWI = dyn_cast(&I)) + if (RMWI->getPointerAddressSpace() == ADDRESS_SPACE_LOCAL) + LocalMemoryAtomics.push_back(RMWI); + + bool Changed = false; + for (AtomicRMWInst *RMWI : LocalMemoryAtomics) + Changed |= lowerAtomicRMWInst(RMWI); + return Changed; +} + +char NVPTXAtomicLower::ID = 0; + +namespace llvm { +void initializeNVPTXAtomicLowerPass(PassRegistry &); +} + +INITIALIZE_PASS(NVPTXAtomicLower, "nvptx-atomic-lower", + "Lower atomics of local memory to simple load/stores", false, + false) + +FunctionPass *llvm::createNVPTXAtomicLowerPass() { + return new NVPTXAtomicLower(); +} diff --git a/llvm/lib/Target/NVPTX/NVPTXAtomicLower.h b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.h new file mode 100644 index 0000000..faf5765 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.h @@ -0,0 +1,22 @@ +//===-- NVPTXAtomicLower.h - Lower atomics of local memory ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Lower atomics of local memory to simple load/stores +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXATOMICLOWER_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXATOMICLOWER_H + +namespace llvm { +class FunctionPass; + +extern FunctionPass *createNVPTXAtomicLowerPass(); +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 3a31b3b..301a410 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -13,6 +13,7 @@ #include "NVPTXTargetMachine.h" #include "NVPTX.h" #include "NVPTXAllocaHoisting.h" +#include "NVPTXAtomicLower.h" #include "NVPTXLowerAggrCopies.h" #include "NVPTXTargetObjectFile.h" #include "NVPTXTargetTransformInfo.h" @@ -65,6 +66,7 @@ void initializeNVVMIntrRangePass(PassRegistry&); void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAllocaHoistingPass(PassRegistry &); +void initializeNVPTXAtomicLowerPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); void initializeNVPTXLowerArgsPass(PassRegistry &); @@ -86,6 +88,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() { initializeGenericToNVVMPass(PR); initializeNVPTXAllocaHoistingPass(PR); initializeNVPTXAssignValidGlobalNamesPass(PR); + initializeNVPTXAtomicLowerPass(PR); initializeNVPTXLowerArgsPass(PR); initializeNVPTXLowerAllocaPass(PR); initializeNVPTXLowerAggrCopiesPass(PR); @@ -252,6 +255,7 @@ void NVPTXPassConfig::addAddressSpaceInferencePasses() { addPass(createSROAPass()); addPass(createNVPTXLowerAllocaPass()); addPass(createInferAddressSpacesPass()); + addPass(createNVPTXAtomicLowerPass()); } void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() { diff --git a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/llvm/lib/Transforms/Scalar/LowerAtomic.cpp index d1f67b3..4063e4f 100644 --- a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/llvm/lib/Transforms/Scalar/LowerAtomic.cpp @@ -40,7 +40,7 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { return true; } -static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) { +bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) { IRBuilder<> Builder(RMWI); Value *Ptr = RMWI->getPointerOperand(); Value *Val = RMWI->getValOperand(); @@ -123,7 +123,7 @@ static bool runOnBasicBlock(BasicBlock &BB) { else if (AtomicCmpXchgInst *CXI = dyn_cast(&Inst)) Changed |= LowerAtomicCmpXchgInst(CXI); else if (AtomicRMWInst *RMWI = dyn_cast(&Inst)) - Changed |= LowerAtomicRMWInst(RMWI); + Changed |= lowerAtomicRMWInst(RMWI); else if (LoadInst *LI = dyn_cast(&Inst)) { if (LI->isAtomic()) LowerLoadInst(LI); diff --git a/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll b/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll new file mode 100644 index 0000000..a041dcb --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -S -nvptx-atomic-lower | FileCheck %s + +; This test ensures that there is a legal way for ptx to lower atomics +; on local memory. Here, we demonstrate this by lowering them to simple +; load and stores. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +target triple = "nvptx64-unknown-unknown" + +define double @kernel(double addrspace(5)* %ptr, double %val) { + %res = atomicrmw fadd double addrspace(5)* %ptr, double %val monotonic, align 8 + ret double %res +; CHECK: %1 = load double, double addrspace(5)* %ptr, align 8 +; CHECK-NEXT: %2 = fadd double %1, %val +; CHECK-NEXT: store double %2, double addrspace(5)* %ptr, align 8 +; CHECK-NEXT: ret double %1 +} + -- 2.7.4