let builders = [
OpBuilder<"Builder *builder, OperationState &result, StringRef name, "
- "FunctionType type, ArrayRef<Type> workgroupAttributions, "
- "ArrayRef<Type> privateAttributions, "
- "ArrayRef<NamedAttribute> attrs">
+ "FunctionType type, ArrayRef<Type> workgroupAttributions = {}, "
+ "ArrayRef<Type> privateAttributions = {}, "
+ "ArrayRef<NamedAttribute> attrs = {}">
];
let extraClassDeclaration = [{
return getTypeAttr().getValue().cast<FunctionType>();
}
+ /// Change the type of this function in place. This is an extremely
+ /// dangerous operation and it is up to the caller to ensure that this is
+ /// legal for this function, and to restore invariants:
+ /// - the entry block args must be updated to match the function params.
+ /// - the argument/result attributes may need an update: if the new type
+ /// has less parameters we drop the extra attributes, if there are more
+ /// parameters they won't have any attributes.
+ // TODO(b/146349912): consider removing this function thanks to rewrite
+ // patterns.
+ void setType(FunctionType newType);
+
/// Returns the number of buffers located in the workgroup memory.
unsigned getNumWorkgroupAttributions() {
return getAttrOfType<IntegerAttr>(getNumWorkgroupAttributionsAttrName())
let skipDefaultBuilders = 1;
let builders = [
- OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
+ OpBuilder<"Builder *builder, OperationState &result, GPUFuncOp kernelFunc, "
"Value *gridSizeX, Value *gridSizeY, Value *gridSizeZ, "
"Value *blockSizeX, Value *blockSizeY, Value *blockSizeZ, "
"ValueRange kernelOperands">,
- OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
+ OpBuilder<"Builder *builder, OperationState &result, GPUFuncOp kernelFunc, "
"KernelDim3 gridSize, KernelDim3 blockSize, "
"ValueRange kernelOperands">
];
EntryPointABIAttr getEntryPointABIAttr(ArrayRef<int32_t> localSize,
MLIRContext *context);
-/// Legalizes a function as an entry function.
-FuncOp lowerAsEntryFunction(FuncOp funcOp, SPIRVTypeConverter &typeConverter,
- ConversionPatternRewriter &rewriter,
- spirv::EntryPointABIAttr entryPointInfo,
- ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo);
-
/// Sets the InterfaceVarABIAttr and EntryPointABIAttr for a function and its
/// arguments
LogicalResult setABIAttrs(FuncOp funcOp,
}
// Rewrite the original GPU function to an LLVM function.
- // TODO(zinenko): there is a hack in the std->llvm lowering that promotes
- // structs to pointers that probably needs to be replicated here.
auto funcType = lowering.convertType(gpuFuncOp.getType())
.cast<LLVM::LLVMType>()
.getPointerElementTy();
}
}
+ // Move the region to the new function, update the entry block signature.
rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
llvmFuncOp.end());
rewriter.applySignatureConversion(&llvmFuncOp.getBody(),
signatureConversion);
+ {
+ // For memref-typed arguments, insert the relevant loads in the beginning
+ // of the block to comply with the LLVM dialect calling convention. This
+ // needs to be done after signature conversion to get the right types.
+ OpBuilder::InsertionGuard guard(rewriter);
+ Block &block = llvmFuncOp.front();
+ rewriter.setInsertionPointToStart(&block);
+
+ for (auto en : llvm::enumerate(gpuFuncOp.getType().getInputs())) {
+ if (!en.value().isa<MemRefType>() &&
+ !en.value().isa<UnrankedMemRefType>())
+ continue;
+
+ BlockArgument *arg = block.getArgument(en.index());
+ Value *loaded = rewriter.create<LLVM::LoadOp>(loc, arg);
+ rewriter.replaceUsesOfBlockArgument(arg, loaded);
+ }
+ }
+
rewriter.eraseOp(gpuFuncOp);
return matchSuccess();
}
};
+struct GPUReturnOpLowering : public LLVMOpLowering {
+ GPUReturnOpLowering(LLVMTypeConverter &typeConverter)
+ : LLVMOpLowering(gpu::ReturnOp::getOperationName(),
+ typeConverter.getDialect()->getContext(),
+ typeConverter) {}
+
+ PatternMatchResult
+ matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
+ ConversionPatternRewriter &rewriter) const override {
+ rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, operands,
+ ArrayRef<Block *>());
+ return matchSuccess();
+ }
+};
+
/// Import the GPU Ops to NVVM Patterns.
#include "GPUToNVVM.cpp.inc"
NVVM::BlockIdYOp, NVVM::BlockIdZOp>,
GPUIndexIntrinsicOpLowering<gpu::GridDimOp, NVVM::GridDimXOp,
NVVM::GridDimYOp, NVVM::GridDimZOp>,
- GPUAllReduceOpLowering, GPUFuncOpLowering>(converter);
+ GPUAllReduceOpLowering, GPUFuncOpLowering, GPUReturnOpLowering>(
+ converter);
patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "__nv_expf",
"__nv_exp");
}
ConversionPatternRewriter &rewriter) const override;
};
-/// Pattern to convert a kernel function in GPU dialect (a FuncOp with the
-/// attribute gpu.kernel) within a spv.module.
-class KernelFnConversion final : public SPIRVOpLowering<FuncOp> {
+/// Pattern to convert a kernel function in GPU dialect within a spv.module.
+class KernelFnConversion final : public SPIRVOpLowering<gpu::GPUFuncOp> {
public:
KernelFnConversion(MLIRContext *context, SPIRVTypeConverter &converter,
ArrayRef<int64_t> workGroupSize,
PatternBenefit benefit = 1)
- : SPIRVOpLowering<FuncOp>(context, converter, benefit) {
+ : SPIRVOpLowering<gpu::GPUFuncOp>(context, converter, benefit) {
auto config = workGroupSize.take_front(3);
workGroupSizeAsInt32.assign(config.begin(), config.end());
workGroupSizeAsInt32.resize(3, 1);
}
PatternMatchResult
- matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
+ matchAndRewrite(gpu::GPUFuncOp funcOp, ArrayRef<Value *> operands,
ConversionPatternRewriter &rewriter) const override;
private:
ConversionPatternRewriter &rewriter) const override;
};
+/// Pattern to convert a gpu.return into a SPIR-V return.
+// TODO: This can go to DRR when GPU return has operands.
+class GPUReturnOpConversion final : public SPIRVOpLowering<gpu::ReturnOp> {
+public:
+ using SPIRVOpLowering<gpu::ReturnOp>::SPIRVOpLowering;
+
+ PatternMatchResult
+ matchAndRewrite(gpu::ReturnOp returnOp, ArrayRef<Value *> operands,
+ ConversionPatternRewriter &rewriter) const override;
+};
+
} // namespace
//===----------------------------------------------------------------------===//
}
//===----------------------------------------------------------------------===//
-// FuncOp with gpu.kernel attribute.
+// GPUFuncOp
//===----------------------------------------------------------------------===//
+// Legalizes a GPU function as an entry SPIR-V function.
+static FuncOp
+lowerAsEntryFunction(gpu::GPUFuncOp funcOp, SPIRVTypeConverter &typeConverter,
+ ConversionPatternRewriter &rewriter,
+ spirv::EntryPointABIAttr entryPointInfo,
+ ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo) {
+ auto fnType = funcOp.getType();
+ if (fnType.getNumResults()) {
+ funcOp.emitError("SPIR-V lowering only supports entry functions"
+ "with no return values right now");
+ return nullptr;
+ }
+ if (fnType.getNumInputs() != argABIInfo.size()) {
+ funcOp.emitError(
+ "lowering as entry functions requires ABI info for all arguments");
+ return nullptr;
+ }
+ // For entry functions need to make the signature void(void). Compute the
+ // replacement value for all arguments and replace all uses.
+ TypeConverter::SignatureConversion signatureConverter(fnType.getNumInputs());
+ {
+ for (auto argType : enumerate(funcOp.getType().getInputs())) {
+ auto convertedType = typeConverter.convertType(argType.value());
+ signatureConverter.addInputs(argType.index(), convertedType);
+ }
+ }
+ auto newFuncOp = rewriter.create<FuncOp>(
+ funcOp.getLoc(), funcOp.getName(),
+ rewriter.getFunctionType(signatureConverter.getConvertedTypes(),
+ llvm::None),
+ ArrayRef<NamedAttribute>());
+ for (const auto &namedAttr : funcOp.getAttrs()) {
+ if (namedAttr.first.is(impl::getTypeAttrName()) ||
+ namedAttr.first.is(SymbolTable::getSymbolAttrName()))
+ continue;
+ newFuncOp.setAttr(namedAttr.first, namedAttr.second);
+ }
+ rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
+ newFuncOp.end());
+ rewriter.applySignatureConversion(&newFuncOp.getBody(), signatureConverter);
+ rewriter.eraseOp(funcOp);
+
+ spirv::setABIAttrs(newFuncOp, entryPointInfo, argABIInfo);
+ return newFuncOp;
+}
+
PatternMatchResult
-KernelFnConversion::matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
+KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp,
+ ArrayRef<Value *> operands,
ConversionPatternRewriter &rewriter) const {
if (!gpu::GPUDialect::isKernel(funcOp)) {
return matchFailure();
auto context = rewriter.getContext();
auto entryPointAttr =
spirv::getEntryPointABIAttr(workGroupSizeAsInt32, context);
- FuncOp newFuncOp = spirv::lowerAsEntryFunction(
- funcOp, typeConverter, rewriter, entryPointAttr, argABI);
+ FuncOp newFuncOp = lowerAsEntryFunction(funcOp, typeConverter, rewriter,
+ entryPointAttr, argABI);
if (!newFuncOp) {
return matchFailure();
}
}
//===----------------------------------------------------------------------===//
+// GPU return inside kernel functions to SPIR-V return.
+//===----------------------------------------------------------------------===//
+
+PatternMatchResult GPUReturnOpConversion::matchAndRewrite(
+ gpu::ReturnOp returnOp, ArrayRef<Value *> operands,
+ ConversionPatternRewriter &rewriter) const {
+ if (!operands.empty())
+ return matchFailure();
+
+ rewriter.replaceOpWithNewOp<spirv::ReturnOp>(returnOp);
+ return matchSuccess();
+}
+
+//===----------------------------------------------------------------------===//
// GPU To SPIRV Patterns.
//===----------------------------------------------------------------------===//
ArrayRef<int64_t> workGroupSize) {
patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
patterns.insert<
- ForOpConversion, KernelModuleConversion, KernelModuleTerminatorConversion,
+ GPUReturnOpConversion, ForOpConversion, KernelModuleConversion,
+ KernelModuleTerminatorConversion,
LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
// Check that `launch_func` refers to a well-formed kernel function.
StringRef kernelName = launchOp.kernel();
Operation *kernelFunc = kernelModule.lookupSymbol(kernelName);
- auto kernelStdFunction = dyn_cast_or_null<::mlir::FuncOp>(kernelFunc);
+ auto kernelGPUFunction = dyn_cast_or_null<gpu::GPUFuncOp>(kernelFunc);
auto kernelLLVMFunction = dyn_cast_or_null<LLVM::LLVMFuncOp>(kernelFunc);
- if (!kernelStdFunction && !kernelLLVMFunction)
+ if (!kernelGPUFunction && !kernelLLVMFunction)
return launchOp.emitOpError("kernel function '")
<< kernelName << "' is undefined";
if (!kernelFunc->getAttrOfType<mlir::UnitAttr>(
unsigned actualNumArguments = launchOp.getNumKernelOperands();
unsigned expectedNumArguments = kernelLLVMFunction
? kernelLLVMFunction.getNumArguments()
- : kernelStdFunction.getNumArguments();
+ : kernelGPUFunction.getNumArguments();
if (expectedNumArguments != actualNumArguments)
return launchOp.emitOpError("got ")
<< actualNumArguments << " kernel operands but expected "
//===----------------------------------------------------------------------===//
void LaunchFuncOp::build(Builder *builder, OperationState &result,
- ::mlir::FuncOp kernelFunc, Value *gridSizeX,
+ GPUFuncOp kernelFunc, Value *gridSizeX,
Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,
Value *blockSizeY, Value *blockSizeZ,
ValueRange kernelOperands) {
}
void LaunchFuncOp::build(Builder *builder, OperationState &result,
- ::mlir::FuncOp kernelFunc, KernelDim3 gridSize,
+ GPUFuncOp kernelFunc, KernelDim3 gridSize,
KernelDim3 blockSize, ValueRange kernelOperands) {
build(builder, result, kernelFunc, gridSize.x, gridSize.y, gridSize.z,
blockSize.x, blockSize.y, blockSize.z, kernelOperands);
p.printRegion(op.getBody(), /*printEntryBlockArgs=*/false);
}
+void GPUFuncOp::setType(FunctionType newType) {
+ auto oldType = getType();
+ assert(newType.getNumResults() == oldType.getNumResults() &&
+ "unimplemented: changes to the number of results");
+
+ SmallVector<char, 16> nameBuf;
+ for (int i = newType.getNumInputs(), e = oldType.getNumInputs(); i < e; i++)
+ removeAttr(getArgAttrName(i, nameBuf));
+
+ setAttr(getTypeAttrName(), TypeAttr::get(newType));
+}
+
/// Hook for FunctionLike verifier.
LogicalResult GPUFuncOp::verifyType() {
Type type = getTypeAttr().getValue();
}
}
-// Add operations generating block/thread ids and gird/block dimensions at the
-// beginning of `kernelFunc` and replace uses of the respective function args.
-static void injectGpuIndexOperations(Location loc, FuncOp kernelFunc) {
- OpBuilder OpBuilder(kernelFunc.getBody());
+// Add operations generating block/thread ids and grid/block dimensions at the
+// beginning of the `body` region and replace uses of the respective function
+// arguments.
+static void injectGpuIndexOperations(Location loc, Region &body) {
+ OpBuilder builder(loc->getContext());
+ Block &firstBlock = body.front();
+ builder.setInsertionPointToStart(&firstBlock);
SmallVector<Value *, 12> indexOps;
- createForAllDimensions<gpu::BlockIdOp>(OpBuilder, loc, indexOps);
- createForAllDimensions<gpu::ThreadIdOp>(OpBuilder, loc, indexOps);
- createForAllDimensions<gpu::GridDimOp>(OpBuilder, loc, indexOps);
- createForAllDimensions<gpu::BlockDimOp>(OpBuilder, loc, indexOps);
+ createForAllDimensions<gpu::BlockIdOp>(builder, loc, indexOps);
+ createForAllDimensions<gpu::ThreadIdOp>(builder, loc, indexOps);
+ createForAllDimensions<gpu::GridDimOp>(builder, loc, indexOps);
+ createForAllDimensions<gpu::BlockDimOp>(builder, loc, indexOps);
// Replace the leading 12 function args with the respective thread/block index
// operations. Iterate backwards since args are erased and indices change.
for (int i = 11; i >= 0; --i) {
- auto &firstBlock = kernelFunc.front();
firstBlock.getArgument(i)->replaceAllUsesWith(indexOps[i]);
firstBlock.eraseArgument(i);
}
// Move arguments of the given kernel function into the function if this reduces
// the number of kernel arguments.
-static gpu::LaunchFuncOp inlineBeneficiaryOps(FuncOp kernelFunc,
+static gpu::LaunchFuncOp inlineBeneficiaryOps(gpu::GPUFuncOp kernelFunc,
gpu::LaunchFuncOp launch) {
OpBuilder kernelBuilder(kernelFunc.getBody());
auto &firstBlock = kernelFunc.getBody().front();
// Outline the `gpu.launch` operation body into a kernel function. Replace
// `gpu.return` operations by `std.return` in the generated function.
-static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
+static gpu::GPUFuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
Location loc = launchOp.getLoc();
+ // Create a builder with no insertion point, insertion will happen separately
+ // due to symbol table manipulation.
+ OpBuilder builder(launchOp.getContext());
+
SmallVector<Type, 4> kernelOperandTypes(launchOp.getKernelOperandTypes());
FunctionType type =
FunctionType::get(kernelOperandTypes, {}, launchOp.getContext());
std::string kernelFuncName =
Twine(launchOp.getParentOfType<FuncOp>().getName(), "_kernel").str();
- FuncOp outlinedFunc = FuncOp::create(loc, kernelFuncName, type);
- outlinedFunc.getBody().takeBody(launchOp.body());
- Builder builder(launchOp.getContext());
+ auto outlinedFunc = builder.create<gpu::GPUFuncOp>(loc, kernelFuncName, type);
outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
builder.getUnitAttr());
- injectGpuIndexOperations(loc, outlinedFunc);
- outlinedFunc.walk([](gpu::ReturnOp op) {
- OpBuilder replacer(op);
- replacer.create<ReturnOp>(op.getLoc());
- op.erase();
- });
+ outlinedFunc.body().takeBody(launchOp.body());
+ injectGpuIndexOperations(loc, outlinedFunc.body());
return outlinedFunc;
}
// Replace `gpu.launch` operations with an `gpu.launch_func` operation launching
// `kernelFunc`. The kernel func contains the body of the `gpu.launch` with
// constant region arguments inlined.
-static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, FuncOp kernelFunc) {
+static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp,
+ gpu::GPUFuncOp kernelFunc) {
OpBuilder builder(launchOp);
auto launchFuncOp = builder.create<gpu::LaunchFuncOp>(
launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
// Insert just after the function.
Block::iterator insertPt(func.getOperation()->getNextNode());
func.walk([&](gpu::LaunchOp op) {
- FuncOp outlinedFunc = outlineKernelFunc(op);
+ gpu::GPUFuncOp outlinedFunc = outlineKernelFunc(op);
// Create nested module and insert outlinedFunc. The module will
// originally get the same name as the function, but may be renamed on
private:
// Returns a module containing kernelFunc and all callees (recursive).
- ModuleOp createKernelModule(FuncOp kernelFunc,
+ ModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc,
const SymbolTable &parentSymbolTable) {
auto context = getModule().getContext();
Builder builder(context);
// Entry Function signature Conversion
//===----------------------------------------------------------------------===//
-FuncOp mlir::spirv::lowerAsEntryFunction(
- FuncOp funcOp, SPIRVTypeConverter &typeConverter,
- ConversionPatternRewriter &rewriter,
- spirv::EntryPointABIAttr entryPointInfo,
- ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo) {
- auto fnType = funcOp.getType();
- if (fnType.getNumResults()) {
- funcOp.emitError("SPIR-V lowering only supports entry functions"
- "with no return values right now");
- return nullptr;
- }
- if (fnType.getNumInputs() != argABIInfo.size()) {
- funcOp.emitError(
- "lowering as entry functions requires ABI info for all arguments");
- return nullptr;
- }
- // For entry functions need to make the signature void(void). Compute the
- // replacement value for all arguments and replace all uses.
- TypeConverter::SignatureConversion signatureConverter(fnType.getNumInputs());
- {
- for (auto argType : enumerate(funcOp.getType().getInputs())) {
- auto convertedType = typeConverter.convertType(argType.value());
- signatureConverter.addInputs(argType.index(), convertedType);
- }
- }
- auto newFuncOp = rewriter.cloneWithoutRegions(funcOp);
- rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
- newFuncOp.end());
- newFuncOp.setType(rewriter.getFunctionType(
- signatureConverter.getConvertedTypes(), llvm::None));
- rewriter.applySignatureConversion(&newFuncOp.getBody(), signatureConverter);
- rewriter.eraseOp(funcOp);
-
- spirv::setABIAttrs(newFuncOp, entryPointInfo, argABIInfo);
- return newFuncOp;
-}
-
LogicalResult
mlir::spirv::setABIAttrs(FuncOp funcOp, spirv::EntryPointABIAttr entryPointInfo,
ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo) {
// CHECK: llvm.mlir.global internal constant @[[global:.*]]("CUBIN")
module @kernel_module attributes {gpu.kernel_module, nvvm.cubin = "CUBIN"} {
- func @kernel(!llvm.float, !llvm<"float*">)
- attributes { gpu.kernel }
+ gpu.func @kernel(%arg0: !llvm.float, %arg1: !llvm<"float*">) attributes {gpu.kernel} {
+ gpu.return
+ }
}
llvm.func @foo() {
"test.finish" () : () -> ()
}) : () -> ()
}
+
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
module @kernels attributes {gpu.kernel_module} {
- func @builtin_workgroup_id_x()
+ gpu.func @builtin_workgroup_id_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
%0 = "gpu.block_id"() {dimension = "x"} : () -> index
- return
+ gpu.return
}
}
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
module @kernels attributes {gpu.kernel_module} {
- func @builtin_workgroup_id_y()
+ gpu.func @builtin_workgroup_id_y()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}}
%0 = "gpu.block_id"() {dimension = "y"} : () -> index
- return
+ gpu.return
}
}
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
module @kernels attributes {gpu.kernel_module} {
- func @builtin_workgroup_id_z()
+ gpu.func @builtin_workgroup_id_z()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}}
%0 = "gpu.block_id"() {dimension = "z"} : () -> index
- return
+ gpu.return
}
}
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
module @kernels attributes {gpu.kernel_module} {
- func @builtin_workgroup_size_x()
+ gpu.func @builtin_workgroup_size_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPSIZE]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
%0 = "gpu.block_dim"() {dimension = "x"} : () -> index
- return
+ gpu.return
}
}
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[LOCALINVOCATIONID:@.*]] built_in("LocalInvocationId")
module @kernels attributes {gpu.kernel_module} {
- func @builtin_local_id_x()
+ gpu.func @builtin_local_id_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[LOCALINVOCATIONID]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
%0 = "gpu.thread_id"() {dimension = "x"} : () -> index
- return
+ gpu.return
}
}
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[NUMWORKGROUPS:@.*]] built_in("NumWorkgroups")
module @kernels attributes {gpu.kernel_module} {
- func @builtin_num_workgroups_x()
+ gpu.func @builtin_num_workgroups_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[NUMWORKGROUPS]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
%0 = "gpu.grid_dim"() {dimension = "x"} : () -> index
- return
+ gpu.return
}
}
}
// CHECK-SAME: [[ARG4:%.*]]: i32 {spirv.interface_var_abi = {binding = 4 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
// CHECK-SAME: [[ARG5:%.*]]: i32 {spirv.interface_var_abi = {binding = 5 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
// CHECK-SAME: [[ARG6:%.*]]: i32 {spirv.interface_var_abi = {binding = 6 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
- func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index)
+ gpu.func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index)
attributes {gpu.kernel} {
// CHECK: [[ADDRESSWORKGROUPID:%.*]] = spv._address_of [[WORKGROUPIDVAR]]
// CHECK: [[WORKGROUPID:%.*]] = spv.Load "Input" [[ADDRESSWORKGROUPID]]
// CHECK: [[PTR3:%.*]] = spv.AccessChain [[ARG2]]{{\[}}{{%.*}}, {{%.*}}{{\]}}
// CHECK-NEXT: spv.Store "StorageBuffer" [[PTR3]], [[VAL3]]
store %16, %arg2[%12, %13] : memref<12x4xf32>
- return
+ gpu.return
}
}
}
}
module @kernels attributes {gpu.kernel_module} {
- func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>)
+ gpu.func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>)
attributes {gpu.kernel} {
// CHECK: [[LB:%.*]] = spv.constant 4 : i32
%lb = constant 4 : index
%1 = load %arg2[%arg4] : memref<10xf32>
store %1, %arg3[%arg4] : memref<10xf32>
}
- return
+ gpu.return
}
}
-}
\ No newline at end of file
+}
// CHECK-SAME: {{%.*}}: f32 {spirv.interface_var_abi = {binding = 0 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
// CHECK-SAME: {{%.*}}: !spv.ptr<!spv.struct<!spv.array<12 x f32 [4]> [0]>, StorageBuffer> {spirv.interface_var_abi = {binding = 1 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
// CHECK-SAME: spirv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}
- func @kernel_1(%arg0 : f32, %arg1 : memref<12xf32>)
- attributes { gpu.kernel } {
+ gpu.func @kernel_1(%arg0 : f32, %arg1 : memref<12xf32>) attributes {gpu.kernel} {
// CHECK: spv.Return
- return
+ gpu.return
}
// CHECK: attributes {capabilities = ["Shader"], extensions = ["SPV_KHR_storage_buffer_storage_class"]}
}
module attributes {gpu.container_module} {
module @kernels attributes {gpu.kernel_module} {
- func @kernel_1(%arg1 : !llvm<"float*">) {
- return
+ gpu.func @kernel_1(%arg1 : !llvm<"float*">) kernel {
+ gpu.return
}
}
func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm<"float*">) {
- // expected-error@+1 {{kernel function is missing the 'gpu.kernel' attribute}}
+ // xpected-error@+1 {{kernel function is missing the 'gpu.kernel' attribute}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg)
{kernel = "kernel_1", kernel_module = @kernels}
: (index, index, index, index, index, index, !llvm<"float*">) -> ()
module attributes {gpu.container_module} {
module @kernels attributes {gpu.kernel_module} {
- func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
- return
+ gpu.func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
+ gpu.return
}
}
// -----
module @kernels attributes {gpu.kernel_module} {
- func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
- return
+ gpu.func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
+ gpu.return
}
}
}
module @kernels attributes {gpu.kernel_module} {
- func @kernel_1(%arg0 : f32, %arg1 : memref<?xf32, 1>)
- attributes { gpu.kernel } {
+ gpu.func @kernel_1(%arg0 : f32, %arg1 : memref<?xf32, 1>) attributes {gpu.kernel} {
%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
%tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
%tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)
"some_op"(%bIdX, %tIdX) : (index, index) -> ()
%42 = load %arg1[%bIdX] : memref<?xf32, 1>
- return
+ gpu.return
}
- func @kernel_2(f32, memref<?xf32, 1>)
- attributes { gpu.kernel }
+ gpu.func @kernel_2(%arg0: f32, %arg1: memref<?xf32, 1>) attributes {gpu.kernel} {
+ gpu.return
+ }
}
func @foo() {
// CHECK-LABEL: module @launch_kernel
-// CHECK-NEXT: func @launch_kernel
+// CHECK-NEXT: gpu.func @launch_kernel
// CHECK-SAME: (%[[KERNEL_ARG0:.*]]: f32, %[[KERNEL_ARG1:.*]]: memref<?xf32, 1>)
-// CHECK: attributes {gpu.kernel}
// CHECK-NEXT: %[[BID:.*]] = "gpu.block_id"() {dimension = "x"} : () -> index
// CHECK-NEXT: = "gpu.block_id"() {dimension = "y"} : () -> index
// CHECK-NEXT: = "gpu.block_id"() {dimension = "z"} : () -> index
}
// CHECK: module @function_call_kernel attributes {gpu.kernel_module} {
-// CHECK: func @function_call_kernel()
+// CHECK: gpu.func @function_call_kernel()
// CHECK: call @device_function() : () -> ()
// CHECK: call @device_function() : () -> ()
// CHECK: llvm.mlir.addressof @global : !llvm<"i64*">