add_public_tablegen_target(NVPTXCommonTableGen)
set(NVPTXCodeGen_sources
+ NVPTXAliasAnalysis.cpp
NVPTXAllocaHoisting.cpp
NVPTXAtomicLower.cpp
NVPTXAsmPrinter.cpp
--- /dev/null
+//===--------------------- NVPTXAliasAnalysis.cpp--------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the NVPTX address space based alias analysis pass.
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXAliasAnalysis.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "NVPTX.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "NVPTX-aa"
+
+AnalysisKey NVPTXAA::Key;
+
+char NVPTXAAWrapperPass::ID = 0;
+char NVPTXExternalAAWrapper::ID = 0;
+
+INITIALIZE_PASS(NVPTXAAWrapperPass, "nvptx-aa",
+ "NVPTX Address space based Alias Analysis", false, true)
+
+INITIALIZE_PASS(NVPTXExternalAAWrapper, "nvptx-aa-wrapper",
+ "NVPTX Address space based Alias Analysis Wrapper", false, true)
+
+ImmutablePass *llvm::createNVPTXAAWrapperPass() {
+ return new NVPTXAAWrapperPass();
+}
+
+ImmutablePass *llvm::createNVPTXExternalAAWrapperPass() {
+ return new NVPTXExternalAAWrapper();
+}
+
+NVPTXAAWrapperPass::NVPTXAAWrapperPass() : ImmutablePass(ID) {
+ initializeNVPTXAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+void NVPTXAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+}
+
+static AliasResult::Kind getAliasResult(unsigned AS1, unsigned AS2) {
+ if ((AS1 == ADDRESS_SPACE_GENERIC) || (AS2 == ADDRESS_SPACE_GENERIC))
+ return AliasResult::MayAlias;
+
+ // PTX s6.4.1.1. Generic Addressing:
+ // A generic address maps to global memory unless it falls within
+ // the window for const, local, or shared memory. The Kernel
+ // Function Parameters (.param) window is contained within the
+ // .global window.
+ //
+ // Therefore a global pointer may alias with a param pointer on some
+ // GPUs via addrspacecast(param->generic->global) when cvta.param
+ // instruction is used (PTX 7.7+ and SM_70+).
+ //
+ // TODO: cvta.param is not yet supported. We need to change aliasing
+ // rules once it is added.
+
+ return (AS1 == AS2 ? AliasResult::MayAlias : AliasResult::NoAlias);
+}
+
+AliasResult NVPTXAAResult::alias(const MemoryLocation &Loc1,
+ const MemoryLocation &Loc2, AAQueryInfo &AAQI,
+ const Instruction *) {
+ unsigned AS1 = Loc1.Ptr->getType()->getPointerAddressSpace();
+ unsigned AS2 = Loc2.Ptr->getType()->getPointerAddressSpace();
+
+ return getAliasResult(AS1, AS2);
+}
+
+// TODO: .param address space may be writable in presence of cvta.param, but
+// this instruction is currently not supported. NVPTXLowerArgs also does not
+// allow any writes to .param pointers.
+static bool isConstOrParam(unsigned AS) {
+ return AS == AddressSpace::ADDRESS_SPACE_CONST ||
+ AS == AddressSpace::ADDRESS_SPACE_PARAM;
+}
+
+ModRefInfo NVPTXAAResult::getModRefInfoMask(const MemoryLocation &Loc,
+ AAQueryInfo &AAQI,
+ bool IgnoreLocals) {
+ if (isConstOrParam(Loc.Ptr->getType()->getPointerAddressSpace()))
+ return ModRefInfo::NoModRef;
+
+ const Value *Base = getUnderlyingObject(Loc.Ptr);
+ if (isConstOrParam(Base->getType()->getPointerAddressSpace()))
+ return ModRefInfo::NoModRef;
+
+ return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals);
+}
--- /dev/null
+//===-------------------- NVPTXAliasAnalysis.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the NVPTX address space based alias analysis pass.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALIASANALYSIS_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+
+namespace llvm {
+
+class MemoryLocation;
+
+class NVPTXAAResult : public AAResultBase {
+public:
+ NVPTXAAResult() {}
+ NVPTXAAResult(NVPTXAAResult &&Arg) : AAResultBase(std::move(Arg)) {}
+
+ /// Handle invalidation events from the new pass manager.
+ ///
+ /// By definition, this result is stateless and so remains valid.
+ bool invalidate(Function &, const PreservedAnalyses &,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ return false;
+ }
+
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI, const Instruction *CtxI = nullptr);
+
+ ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool IgnoreLocals);
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class NVPTXAA : public AnalysisInfoMixin<NVPTXAA> {
+ friend AnalysisInfoMixin<NVPTXAA>;
+
+ static AnalysisKey Key;
+
+public:
+ using Result = NVPTXAAResult;
+
+ NVPTXAAResult run(Function &F, AnalysisManager<Function> &AM) {
+ return NVPTXAAResult();
+ }
+};
+
+/// Legacy wrapper pass to provide the NVPTXAAResult object.
+class NVPTXAAWrapperPass : public ImmutablePass {
+ std::unique_ptr<NVPTXAAResult> Result;
+
+public:
+ static char ID;
+
+ NVPTXAAWrapperPass();
+
+ NVPTXAAResult &getResult() { return *Result; }
+ const NVPTXAAResult &getResult() const { return *Result; }
+
+ bool doInitialization(Module &M) override {
+ Result.reset(new NVPTXAAResult());
+ return false;
+ }
+
+ bool doFinalization(Module &M) override {
+ Result.reset();
+ return false;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+// Wrapper around ExternalAAWrapperPass so that the default
+// constructor gets the callback.
+class NVPTXExternalAAWrapper : public ExternalAAWrapperPass {
+public:
+ static char ID;
+
+ NVPTXExternalAAWrapper()
+ : ExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
+ if (auto *WrapperPass =
+ P.getAnalysisIfAvailable<NVPTXAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ }) {}
+};
+
+ImmutablePass *createNVPTXAAWrapperPass();
+void initializeNVPTXAAWrapperPassPass(PassRegistry &);
+ImmutablePass *createNVPTXExternalAAWrapperPass();
+void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_NVPTX_NVPTXALIASANALYSIS_H
#include "NVPTXTargetMachine.h"
#include "NVPTX.h"
+#include "NVPTXAliasAnalysis.h"
#include "NVPTXAllocaHoisting.h"
#include "NVPTXAtomicLower.h"
#include "NVPTXLowerAggrCopies.h"
void initializeNVPTXProxyRegErasurePass(PassRegistry &);
void initializeNVVMIntrRangePass(PassRegistry &);
void initializeNVVMReflectPass(PassRegistry &);
+void initializeNVPTXAAWrapperPassPass(PassRegistry &);
+void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
} // end namespace llvm
initializeNVPTXLowerAggrCopiesPass(PR);
initializeNVPTXProxyRegErasurePass(PR);
initializeNVPTXDAGToDAGISelPass(PR);
+ initializeNVPTXAAWrapperPassPass(PR);
+ initializeNVPTXExternalAAWrapperPass(PR);
}
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
F, STI);
}
+void NVPTXTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
+ AAM.registerFunctionAnalysis<NVPTXAA>();
+}
+
void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineParsingCallback(
[](StringRef PassName, FunctionPassManager &PM,
return false;
});
+ PB.registerAnalysisRegistrationCallback([](FunctionAnalysisManager &FAM) {
+ FAM.registerPass([&] { return NVPTXAA(); });
+ });
+
+ PB.registerParseAACallback([](StringRef AAName, AAManager &AAM) {
+ if (AAName == "nvptx-aa") {
+ AAM.registerFunctionAnalysis<NVPTXAA>();
+ return true;
+ }
+ return false;
+ });
+
PB.registerPipelineStartEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
disablePass(&PatchableFunctionID);
disablePass(&ShrinkWrapID);
+ addPass(createNVPTXAAWrapperPass());
+ addPass(createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<NVPTXAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ }));
+
// NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
// it here does nothing. But since we need it for correctness when lowering
// to NVPTX, run it here too, in case whoever built our pass pipeline didn't
createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const override;
+ void registerDefaultAliasAnalyses(AAManager &AAM) override;
+
void registerPassBuilderCallbacks(PassBuilder &PB) override;
TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
--- /dev/null
+; RUN: opt -passes=aa-eval -aa-pipeline=nvptx-aa -print-all-alias-modref-info < %s -S 2>&1 \
+; RUN: | FileCheck %s --check-prefixes CHECK-ALIAS
+;
+; RUN: opt -aa-pipeline=nvptx-aa -passes=licm < %s -S | FileCheck %s --check-prefixes CHECK-AA-CONST
+; RUN: opt -aa-pipeline=basic-aa -passes=licm < %s -S | FileCheck %s --check-prefixes CHECK-NOAA-CONST
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; CHECK-ALIAS-LABEL: Function: test
+; CHECK-ALIAS: MayAlias: i8* %gen, i8 addrspace(1)* %global
+; CHECK-ALIAS: MayAlias: i8* %gen, i8 addrspace(3)* %shared
+; CHECK-ALIAS: NoAlias: i8 addrspace(1)* %global, i8 addrspace(3)* %shared
+; CHECK-ALIAS: MayAlias: i8 addrspace(4)* %const, i8* %gen
+; CHECK-ALIAS: NoAlias: i8 addrspace(4)* %const, i8 addrspace(1)* %global
+; CHECK-ALIAS: NoAlias: i8 addrspace(4)* %const, i8 addrspace(3)* %shared
+; CHECK-ALIAS: MayAlias: i8* %gen, i8 addrspace(5)* %local
+; CHECK-ALIAS: NoAlias: i8 addrspace(1)* %global, i8 addrspace(5)* %local
+; CHECK-ALIAS: NoAlias: i8 addrspace(5)* %local, i8 addrspace(3)* %shared
+; CHECK-ALIAS: NoAlias: i8 addrspace(4)* %const, i8 addrspace(5)* %local
+; CHECK-ALIAS: MayAlias: i8* %gen, i8 addrspace(101)* %param
+; CHECK-ALIAS: NoAlias: i8 addrspace(1)* %global, i8 addrspace(101)* %param
+; CHECK-ALIAS: NoAlias: i8 addrspace(101)* %param, i8 addrspace(3)* %shared
+; CHECK-ALIAS: NoAlias: i8 addrspace(4)* %const, i8 addrspace(101)* %param
+; CHECK-ALIAS: NoAlias: i8 addrspace(5)* %local, i8 addrspace(101)* %param
+
+define i8 @test_alias(ptr %gen, ptr addrspace(1) %global, ptr addrspace(3) %shared, ptr addrspace(4) %const, ptr addrspace(5) %local) {
+ %param = addrspacecast ptr %gen to ptr addrspace(101)
+ %v1 = load i8, ptr %gen
+ %v2 = load i8, ptr addrspace(1) %global
+ %v3 = load i8, ptr addrspace(3) %shared
+ %v4 = load i8, ptr addrspace(4) %const
+ %v5 = load i8, ptr addrspace(5) %local
+ %v6 = load i8, ptr addrspace(101) %param
+ %res1 = add i8 %v1, %v2
+ %res2 = add i8 %res1, %v3
+ %res3 = add i8 %res2, %v4
+ %res4 = add i8 %res3, %v5
+ %res5 = add i8 %res4, %v6
+ ret i8 %res5
+}
+
+; CHECK-ALIAS-LABEL: Function: test_const
+; CHECK-ALIAS: MayAlias: i8* %gen, i8 addrspace(1)* %global
+; CHECK-ALIAS: NoAlias: i8 addrspace(4)* %const, i8 addrspace(1)* %global
+; CHECK-ALIAS: MayAlias: i8 addrspace(4)* %const, i8* %gen
+;
+define i8 @test_const(ptr %gen, ptr addrspace(1) %global, ptr addrspace(4) %const) {
+;
+; Even though %gen and %const may alias and there is a store to %gen,
+; LICM should be able to hoist %load_const because it is known to be
+; constant (AA::pointsToConstantMemory()).
+;
+; CHECK-AA-CONST-LABEL: @test_const
+; CHECK-AA-CONST-LABEL: entry
+; CHECK-AA-CONST: %[[LOAD_CONST:.+]] = load i8, ptr addrspace(4)
+; CHECK-AA-CONST-LABEL: loop
+; CHECK-AA-CONST: add {{.*}}%[[LOAD_CONST]]
+;
+; Without NVPTX AA the load is left in the loop because we assume that
+; it may be clobbered by the store.
+;
+; CHECK-NOAA-CONST-LABEL: @test_const
+; CHECK-NOAA-CONST-LABEL: loop
+; CHECK-NOAA-CONST: %[[LOAD_CONST:.+]] = load i8, ptr addrspace(4)
+; CHECK-NOAA-CONST: add {{.*}}%[[LOAD_CONST]]
+entry:
+ br label %loop
+loop:
+ %v = phi i8 [0, %entry], [%v2, %loop]
+ %load_global = load i8, ptr addrspace(1) %global
+ store i8 %load_global, ptr %gen
+ %load_const = load i8, ptr addrspace(4) %const
+ %v2 = add i8 %v, %load_const
+ %cond = icmp eq i8 %load_const, 0
+ br i1 %cond, label %done, label %loop
+done:
+ ret i8 %v2
+}
+
+; Same as @test_const above, but for param space.
+;
+; CHECK-ALIAS-LABEL: Function: test_param
+; CHECK-ALIAS: MayAlias: i8* %gen, i8 addrspace(1)* %global
+; CHECK-ALIAS: NoAlias: i8 addrspace(1)* %global, i8 addrspace(101)* %param
+; CHECK-ALIAS: MayAlias: i8* %gen, i8 addrspace(101)* %param
+;
+define i8 @test_param(ptr %gen, ptr addrspace(1) %global, ptr %param_gen) {
+;
+; CHECK-AA-CONST-LABEL: @test_param
+; CHECK-AA-CONST-LABEL: entry
+; CHECK-AA-CONST: %[[LOAD_PARAM:.+]] = load i8, ptr addrspace(101)
+; CHECK-AA-CONST-LABEL: loop
+; CHECK-AA-CONST: add {{.*}}%[[LOAD_PARAM]]
+;
+; CHECK-NOAA-CONST-LABEL: @test_param
+; CHECK-NOAA-CONST-LABEL: loop
+; CHECK-NOAA-CONST: %[[LOAD_PARAM:.+]] = load i8, ptr addrspace(101)
+; CHECK-NOAA-CONST: add {{.*}}%[[LOAD_PARAM]]
+entry:
+ %param = addrspacecast ptr %param_gen to ptr addrspace(101)
+ br label %loop
+loop:
+ %v = phi i8 [0, %entry], [%v2, %loop]
+ %load_global = load i8, ptr addrspace(1) %global
+ store i8 %load_global, ptr %gen
+ %load_const = load i8, ptr addrspace(101) %param
+ %v2 = add i8 %v, %load_const
+ %cond = icmp eq i8 %load_const, 0
+ br i1 %cond, label %done, label %loop
+done:
+ ret i8 %v2
+}