From: Ivan Butygin Date: Thu, 22 Dec 2022 19:50:52 +0000 (+0100) Subject: [mlir][gpu][spirv] Lower gpu reduction ops to spirv X-Git-Tag: upstream/17.0.6~22434 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2e4aa3bd83faef5e89275cba97d99b1a77c4d25c;p=platform%2Fupstream%2Fllvm.git [mlir][gpu][spirv] Lower gpu reduction ops to spirv Supports only "add" and "mul" ops for now. More ops will be added later. Differential Revision: https://reviews.llvm.org/D140576 --- diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp index 5c9de5e..86a71ac 100644 --- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp @@ -427,6 +427,118 @@ LogicalResult GPUShuffleConversion::matchAndRewrite( } //===----------------------------------------------------------------------===// +// Group ops +//===----------------------------------------------------------------------===// + +template +static Value createGroupReduceOpImpl(OpBuilder &builder, Location loc, + Value arg, bool isGroup, bool isUniform) { + Type type = arg.getType(); + auto scope = mlir::spirv::ScopeAttr::get(builder.getContext(), + isGroup ? spirv::Scope::Workgroup + : spirv::Scope::Subgroup); + auto groupOp = spirv::GroupOperationAttr::get(builder.getContext(), + spirv::GroupOperation::Reduce); + if (isUniform) { + return builder.create(loc, type, scope, groupOp, arg) + .getResult(); + } else { + return builder.create(loc, type, scope, groupOp, arg, Value{}) + .getResult(); + } +} + +static llvm::Optional createGroupReduceOp(OpBuilder &builder, + Location loc, Value arg, + gpu::AllReduceOperation opType, + bool isGroup, bool isUniform) { + using FuncT = Value (*)(OpBuilder &, Location, Value, bool, bool); + struct OpHandler { + gpu::AllReduceOperation type; + FuncT intFunc; + FuncT floatFunc; + }; + + Type type = arg.getType(); + using MembptrT = FuncT OpHandler::*; + MembptrT handlerPtr; + if (type.isa()) { + handlerPtr = &OpHandler::floatFunc; + } else if (type.isa()) { + handlerPtr = &OpHandler::intFunc; + } else { + return std::nullopt; + } + + using ReduceType = gpu::AllReduceOperation; + namespace spv = spirv; + const OpHandler handlers[] = { + {ReduceType::ADD, + &createGroupReduceOpImpl, + &createGroupReduceOpImpl}, + {ReduceType::MUL, + &createGroupReduceOpImpl, + &createGroupReduceOpImpl}, + }; + + for (auto &handler : handlers) + if (handler.type == opType) + return (handler.*handlerPtr)(builder, loc, arg, isGroup, isUniform); + + return std::nullopt; +} + +/// Pattern to convert a gpu.all_reduce op into a SPIR-V group op. +class GPUAllReduceConversion final + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(gpu::AllReduceOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto opType = op.getOp(); + + // gpu.all_reduce can have either reduction op attribute or reduction + // region. Only attribute version is supported. + if (!opType) + return failure(); + + auto result = + createGroupReduceOp(rewriter, op.getLoc(), adaptor.getValue(), *opType, + /*isGroup*/ true, op.getUniform()); + if (!result) + return failure(); + + rewriter.replaceOp(op, *result); + return success(); + } +}; + +/// Pattern to convert a gpu.subgroup_reduce op into a SPIR-V group op. +class GPUSubgroupReduceConversion final + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(gpu::SubgroupReduceOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto opType = op.getOp(); + auto result = + createGroupReduceOp(rewriter, op.getLoc(), adaptor.getValue(), opType, + /*isGroup*/ false, op.getUniform()); + if (!result) + return failure(); + + rewriter.replaceOp(op, *result); + return success(); + } +}; + +//===----------------------------------------------------------------------===// // GPU To SPIRV Patterns. //===----------------------------------------------------------------------===// @@ -448,5 +560,6 @@ void mlir::populateGPUToSPIRVPatterns(SPIRVTypeConverter &typeConverter, spirv::BuiltIn::NumSubgroups>, SingleDimLaunchConfigConversion, - WorkGroupSizeConversion>(typeConverter, patterns.getContext()); + WorkGroupSizeConversion, GPUAllReduceConversion, + GPUSubgroupReduceConversion>(typeConverter, patterns.getContext()); } diff --git a/mlir/test/Conversion/GPUToSPIRV/reductions.mlir b/mlir/test/Conversion/GPUToSPIRV/reductions.mlir new file mode 100644 index 0000000..2457045 --- /dev/null +++ b/mlir/test/Conversion/GPUToSPIRV/reductions.mlir @@ -0,0 +1,319 @@ +// RUN: mlir-opt -split-input-file -convert-gpu-to-spirv -verify-diagnostics %s -o - | FileCheck %s + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupFAdd %[[ARG]] : f32 + %reduced = gpu.all_reduce add %arg uniform {} : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformFAdd "Workgroup" "Reduce" %[[ARG]] : f32 + %reduced = gpu.all_reduce add %arg {} : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupIAdd %[[ARG]] : i32 + %reduced = gpu.all_reduce add %arg uniform {} : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformIAdd "Workgroup" "Reduce" %[[ARG]] : i32 + %reduced = gpu.all_reduce add %arg {} : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupFAdd %[[ARG]] : f32 + %reduced = gpu.subgroup_reduce add %arg uniform : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformFAdd "Subgroup" "Reduce" %[[ARG]] : f32 + %reduced = gpu.subgroup_reduce add %arg : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupIAdd %[[ARG]] : i32 + %reduced = gpu.subgroup_reduce add %arg uniform : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformIAdd "Subgroup" "Reduce" %[[ARG]] : i32 + %reduced = gpu.subgroup_reduce add %arg : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.KHR.GroupFMul %[[ARG]] : f32 + %reduced = gpu.all_reduce mul %arg uniform {} : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformFMul "Workgroup" "Reduce" %[[ARG]] : f32 + %reduced = gpu.all_reduce mul %arg {} : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.KHR.GroupIMul %[[ARG]] : i32 + %reduced = gpu.all_reduce mul %arg uniform {} : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformIMul "Workgroup" "Reduce" %[[ARG]] : i32 + %reduced = gpu.all_reduce mul %arg {} : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.KHR.GroupFMul %[[ARG]] : f32 + %reduced = gpu.subgroup_reduce mul %arg uniform : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformFMul "Subgroup" "Reduce" %[[ARG]] : f32 + %reduced = gpu.subgroup_reduce mul %arg : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.KHR.GroupIMul %[[ARG]] : i32 + %reduced = gpu.subgroup_reduce mul %arg uniform : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformIMul "Subgroup" "Reduce" %[[ARG]] : i32 + %reduced = gpu.subgroup_reduce mul %arg : (i32) -> (i32) + gpu.return + } +} + +}