- `1`
- `0`
- :good:`100%`
- * - mlir/include/mlir/Conversion/VectorToROCDL
- - `1`
- - `1`
- - `0`
- - :good:`100%`
* - mlir/include/mlir/Conversion/VectorToSCF
- `1`
- `1`
- `2`
- `0`
- :good:`100%`
- * - mlir/lib/Conversion/VectorToROCDL
- - `1`
- - `1`
- - `0`
- - :good:`100%`
* - mlir/lib/Conversion/VectorToSCF
- `1`
- `1`
mlir/include/mlir/Conversion/TosaToStandard/TosaToStandard.h
mlir/include/mlir/Conversion/VectorToGPU/VectorToGPU.h
mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h
-mlir/include/mlir/Conversion/VectorToROCDL/VectorToROCDL.h
mlir/include/mlir/Conversion/VectorToSCF/VectorToSCF.h
mlir/include/mlir/Conversion/VectorToSPIRV/VectorToSPIRV.h
mlir/include/mlir/Conversion/VectorToSPIRV/VectorToSPIRVPass.h
mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp
mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
-mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp
mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRVPass.cpp
mlir/lib/Dialect/Traits.cpp
#include "mlir/Conversion/TosaToTensor/TosaToTensor.h"
#include "mlir/Conversion/VectorToGPU/VectorToGPU.h"
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
-#include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h"
#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
#include "mlir/Conversion/VectorToSPIRV/VectorToSPIRVPass.h"
"dialect";
let constructor = "mlir::createConvertVectorToGPUPass()";
let dependentDialects = [
- "memref::MemRefDialect", "gpu::GPUDialect", "AffineDialect",
+ "memref::MemRefDialect", "gpu::GPUDialect", "AffineDialect",
"vector::VectorDialect", "nvgpu::NVGPUDialect"
];
let options = [
- Option<"useNvGpu", "use-nvgpu", "bool", /*default=*/"false",
+ Option<"useNvGpu", "use-nvgpu", "bool", /*default=*/"false",
"convert to NvGPU ops instead of GPU dialect ops">
];
}
}
//===----------------------------------------------------------------------===//
-// VectorToROCDL
-//===----------------------------------------------------------------------===//
-
-def ConvertVectorToROCDL : Pass<"convert-vector-to-rocdl", "ModuleOp"> {
- let summary = "Lower the operations from the vector dialect into the ROCDL "
- "dialect";
- let constructor = "mlir::createConvertVectorToROCDLPass()";
- let dependentDialects = ["ROCDL::ROCDLDialect"];
-}
-
-//===----------------------------------------------------------------------===//
// VectorToSPIRV
//===----------------------------------------------------------------------===//
+++ /dev/null
-//===- VectorToROCDL.h - Convert Vector to ROCDL dialect ---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef MLIR_CONVERSION_VECTORTOROCDL_VECTORTOROCDL_H_
-#define MLIR_CONVERSION_VECTORTOROCDL_VECTORTOROCDL_H_
-
-#include <memory>
-
-namespace mlir {
-class LLVMTypeConverter;
-class ModuleOp;
-template <typename OpT>
-class OperationPass;
-class RewritePatternSet;
-
-/// Collect a set of patterns to convert from the GPU dialect to ROCDL.
-void populateVectorToROCDLConversionPatterns(LLVMTypeConverter &converter,
- RewritePatternSet &patterns);
-
-/// Create a pass to convert vector operations to the ROCDL dialect.
-std::unique_ptr<OperationPass<ModuleOp>> createConvertVectorToROCDLPass();
-
-} // namespace mlir
-#endif // MLIR_CONVERSION_VECTORTOROCDL_VECTORTOROCDL_H_
add_subdirectory(TosaToLinalg)
add_subdirectory(TosaToSCF)
add_subdirectory(TosaToTensor)
-add_subdirectory(VectorToROCDL)
add_subdirectory(VectorToLLVM)
add_subdirectory(VectorToGPU)
add_subdirectory(VectorToSCF)
MLIRMemRefToLLVM
MLIRROCDLDialect
MLIRPass
- MLIRVectorToROCDL
)
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
-#include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns,
*maybeChipset);
populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
- populateVectorToROCDLConversionPatterns(converter, llvmPatterns);
cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns);
populateFuncToLLVMConversionPatterns(converter, llvmPatterns);
populateMemRefToLLVMConversionPatterns(converter, llvmPatterns);
+++ /dev/null
-add_mlir_conversion_library(MLIRVectorToROCDL
- VectorToROCDL.cpp
-
- ADDITIONAL_HEADER_DIRS
- ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/VectorToROCDL
-
- DEPENDS
- MLIRConversionPassIncGen
- intrinsics_gen
-
- LINK_COMPONENTS
- Core
-
- LINK_LIBS PUBLIC
- MLIRFuncToLLVM
- MLIRROCDLDialect
- MLIRLLVMCommonConversion
- MLIRMemRefToLLVM
- MLIRVectorDialect
- MLIRTransforms
- )
+++ /dev/null
-//===- VectorToROCDL.cpp - Vector to ROCDL lowering passes ------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass to generate ROCDLIR operations for higher-level
-// Vector operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h"
-
-#include "../PassDetail.h"
-#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
-#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
-#include "mlir/Conversion/LLVMCommon/Pattern.h"
-#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
-#include "mlir/Dialect/GPU/IR/GPUDialect.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
-#include "mlir/Dialect/Vector/IR/VectorOps.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/DialectConversion.h"
-
-using namespace mlir;
-using namespace mlir::vector;
-
-static LogicalResult replaceTransferOpWithMubuf(
- ConversionPatternRewriter &rewriter, ValueRange operands,
- LLVMTypeConverter &typeConverter, Location loc, TransferReadOp xferOp,
- Type &vecTy, Value &dwordConfig, Value &vindex, Value &offsetSizeInBytes,
- Value &glc, Value &slc) {
- rewriter.replaceOpWithNewOp<ROCDL::MubufLoadOp>(
- xferOp, vecTy, dwordConfig, vindex, offsetSizeInBytes, glc, slc);
- return success();
-}
-
-static LogicalResult replaceTransferOpWithMubuf(
- ConversionPatternRewriter &rewriter, ValueRange operands,
- LLVMTypeConverter &typeConverter, Location loc, TransferWriteOp xferOp,
- Type &vecTy, Value &dwordConfig, Value &vindex, Value &offsetSizeInBytes,
- Value &glc, Value &slc) {
- auto adaptor = TransferWriteOpAdaptor(operands, xferOp->getAttrDictionary());
- rewriter.replaceOpWithNewOp<ROCDL::MubufStoreOp>(xferOp, adaptor.getVector(),
- dwordConfig, vindex,
- offsetSizeInBytes, glc, slc);
- return success();
-}
-
-namespace {
-/// Conversion pattern that converts a 1-D vector transfer read/write.
-/// Note that this conversion pass only converts vector x2 or x4 f32
-/// types. For unsupported cases, they will fall back to the vector to
-/// llvm conversion pattern.
-template <typename ConcreteOp>
-class VectorTransferConversion : public ConvertOpToLLVMPattern<ConcreteOp> {
-public:
- using ConvertOpToLLVMPattern<ConcreteOp>::ConvertOpToLLVMPattern;
-
- LogicalResult
- matchAndRewrite(ConcreteOp xferOp, typename ConcreteOp::Adaptor adaptor,
- ConversionPatternRewriter &rewriter) const override {
- // TODO: support 0-d corner case.
- if (xferOp.getTransferRank() == 0)
- return failure();
-
- if (xferOp.getVectorType().getRank() > 1 ||
- llvm::size(xferOp.getIndices()) == 0)
- return failure();
-
- if (!xferOp.getPermutationMap().isMinorIdentity())
- return failure();
-
- // Have it handled in vector->llvm conversion pass.
- if (xferOp.isDimInBounds(0))
- return failure();
-
- auto toLLVMTy = [&](Type t) {
- return this->getTypeConverter()->convertType(t);
- };
- auto vecTy = toLLVMTy(xferOp.getVectorType());
- unsigned vecWidth = LLVM::getVectorNumElements(vecTy).getFixedValue();
- Location loc = xferOp->getLoc();
-
- // The backend result vector scalarization have trouble scalarize
- // <1 x ty> result, exclude the x1 width from the lowering.
- if (vecWidth != 2 && vecWidth != 4)
- return failure();
-
- // Obtain dataPtr and elementType from the memref.
- auto memRefType = xferOp.getShapedType().template dyn_cast<MemRefType>();
- if (!memRefType)
- return failure();
- // MUBUF instruction operate only on addresspace 0(unified) or 1(global)
- // In case of 3(LDS): fall back to vector->llvm pass
- // In case of 5(VGPR): wrong
- if ((memRefType.getMemorySpaceAsInt() != 0) &&
- (memRefType.getMemorySpaceAsInt() != 1))
- return failure();
-
- // Note that the dataPtr starts at the offset address specified by
- // indices, so no need to calculate offset size in bytes again in
- // the MUBUF instruction.
- Value dataPtr = this->getStridedElementPtr(
- loc, memRefType, adaptor.getSource(), adaptor.getIndices(), rewriter);
-
- // 1. Create and fill a <4 x i32> dwordConfig with:
- // 1st two elements holding the address of dataPtr.
- // 3rd element: -1.
- // 4th element: 0x27000.
- SmallVector<int32_t, 4> constConfigAttr{0, 0, -1, 0x27000};
- Type i32Ty = rewriter.getIntegerType(32);
- VectorType i32Vecx4 = VectorType::get(4, i32Ty);
- Value constConfig = rewriter.create<LLVM::ConstantOp>(
- loc, toLLVMTy(i32Vecx4),
- DenseElementsAttr::get(i32Vecx4, ArrayRef<int32_t>(constConfigAttr)));
-
- // Treat first two element of <4 x i32> as i64, and save the dataPtr
- // to it.
- Type i64Ty = rewriter.getIntegerType(64);
- Value i64x2Ty = rewriter.create<LLVM::BitcastOp>(
- loc, LLVM::getFixedVectorType(toLLVMTy(i64Ty), 2), constConfig);
- Value dataPtrAsI64 = rewriter.create<LLVM::PtrToIntOp>(
- loc, toLLVMTy(i64Ty).template cast<Type>(), dataPtr);
- Value zero = this->createIndexConstant(rewriter, loc, 0);
- Value dwordConfig = rewriter.create<LLVM::InsertElementOp>(
- loc, LLVM::getFixedVectorType(toLLVMTy(i64Ty), 2), i64x2Ty,
- dataPtrAsI64, zero);
- dwordConfig =
- rewriter.create<LLVM::BitcastOp>(loc, toLLVMTy(i32Vecx4), dwordConfig);
-
- // 2. Rewrite op as a buffer read or write.
- Value int1False = rewriter.create<LLVM::ConstantOp>(
- loc, toLLVMTy(rewriter.getIntegerType(1)),
- rewriter.getIntegerAttr(rewriter.getIntegerType(1), 0));
- Value int32Zero = rewriter.create<LLVM::ConstantOp>(
- loc, toLLVMTy(i32Ty),
- rewriter.getIntegerAttr(rewriter.getIntegerType(32), 0));
- return replaceTransferOpWithMubuf(
- rewriter, adaptor.getOperands(), *this->getTypeConverter(), loc, xferOp,
- vecTy, dwordConfig, int32Zero, int32Zero, int1False, int1False);
- }
-};
-} // namespace
-
-void mlir::populateVectorToROCDLConversionPatterns(
- LLVMTypeConverter &converter, RewritePatternSet &patterns) {
- patterns.add<VectorTransferConversion<TransferReadOp>,
- VectorTransferConversion<TransferWriteOp>>(converter);
-}
-
-namespace {
-struct LowerVectorToROCDLPass
- : public ConvertVectorToROCDLBase<LowerVectorToROCDLPass> {
- void runOnOperation() override;
-};
-} // namespace
-
-void LowerVectorToROCDLPass::runOnOperation() {
- LLVMTypeConverter converter(&getContext());
- RewritePatternSet patterns(&getContext());
-
- populateVectorToROCDLConversionPatterns(converter, patterns);
- populateMemRefToLLVMConversionPatterns(converter, patterns);
- populateFuncToLLVMConversionPatterns(converter, patterns);
-
- LLVMConversionTarget target(getContext());
- target.addLegalDialect<ROCDL::ROCDLDialect>();
-
- if (failed(
- applyPartialConversion(getOperation(), target, std::move(patterns))))
- signalPassFailure();
-}
-
-std::unique_ptr<OperationPass<ModuleOp>>
-mlir::createConvertVectorToROCDLPass() {
- return std::make_unique<LowerVectorToROCDLPass>();
-}
+++ /dev/null
-// RUN: mlir-opt %s -convert-vector-to-rocdl | FileCheck %s
-
-gpu.module @test_read{
-func.func @transfer_readx2(%A : memref<?xf32>, %base: index) -> vector<2xf32> {
- %f0 = arith.constant 0.0: f32
- %f = vector.transfer_read %A[%base], %f0
- {permutation_map = affine_map<(d0) -> (d0)>} :
- memref<?xf32>, vector<2xf32>
- return %f: vector<2xf32>
-}
-// CHECK-LABEL: @transfer_readx2
-// CHECK: rocdl.buffer.load {{.*}} vector<2xf32>
-
-func.func @transfer_readx4(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
- %f0 = arith.constant 0.0: f32
- %f = vector.transfer_read %A[%base], %f0
- {permutation_map = affine_map<(d0) -> (d0)>} :
- memref<?xf32>, vector<4xf32>
- return %f: vector<4xf32>
-}
-// CHECK-LABEL: @transfer_readx4
-// CHECK: rocdl.buffer.load {{.*}} vector<4xf32>
-
-func.func @transfer_read_dwordConfig(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
- %f0 = arith.constant 0.0: f32
- %f = vector.transfer_read %A[%base], %f0
- {permutation_map = affine_map<(d0) -> (d0)>} :
- memref<?xf32>, vector<4xf32>
- return %f: vector<4xf32>
-}
-// CHECK-LABEL: @transfer_read_dwordConfig
-// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}}
-// CHECK: [0, 0, -1, 159744]
-// CHECK: %[[i64:.*]] = llvm.ptrtoint %[[gep]]
-// CHECK: llvm.insertelement %[[i64]]
-}
-
-gpu.module @test_write{
-func.func @transfer_writex2(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
- vector.transfer_write %B, %A[%base]
- {permutation_map = affine_map<(d0) -> (d0)>} :
- vector<2xf32>, memref<?xf32>
- return
-}
-// CHECK-LABEL: @transfer_writex2
-// CHECK: rocdl.buffer.store {{.*}} vector<2xf32>
-
-func.func @transfer_writex4(%A : memref<?xf32>, %B : vector<4xf32>, %base: index) {
- vector.transfer_write %B, %A[%base]
- {permutation_map = affine_map<(d0) -> (d0)>} :
- vector<4xf32>, memref<?xf32>
- return
-}
-// CHECK-LABEL: @transfer_writex4
-// CHECK: rocdl.buffer.store {{.*}} vector<4xf32>
-
-func.func @transfer_write_dwordConfig(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
- vector.transfer_write %B, %A[%base]
- {permutation_map = affine_map<(d0) -> (d0)>} :
- vector<2xf32>, memref<?xf32>
- return
-}
-// CHECK-LABEL: @transfer_write_dwordConfig
-// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}}
-// CHECK: [0, 0, -1, 159744]
-// CHECK: %[[i64:.*]] = llvm.ptrtoint %[[gep]]
-// CHECK: llvm.insertelement %[[i64]]
-}
// RUN: mlir-opt %s \
// RUN: -convert-scf-to-cf \
// RUN: -gpu-kernel-outlining \
-// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{chipset=%chip index-bitwidth=32},gpu-to-hsaco{chip=%chip})' \
// RUN: -gpu-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext \
// RUN: --entry-point-result=void \
// RUN: | FileCheck %s
+// TODO: swap for vector transfer reads if we ever create a --vector-to-amdgpu
func.func @vectransferx2(%arg0 : memref<?xf32>, %arg1 : memref<?xf32>) {
%cst = arith.constant 1 : index
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst)
threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, %block_z = %cst) {
%f0 = arith.constant 0.0: f32
- %base = arith.constant 0 : index
- %f = vector.transfer_read %arg0[%base], %f0
- {permutation_map = affine_map<(d0) -> (d0)>} :
- memref<?xf32>, vector<2xf32>
+ %base = arith.constant 0 : i32
+ %f = amdgpu.raw_buffer_load {boundsCheck = true } %arg0[%base]
+ : memref<?xf32>, i32 -> vector<2xf32>
%c = arith.addf %f, %f : vector<2xf32>
- %base1 = arith.constant 1 : index
- vector.transfer_write %c, %arg1[%base1]
- {permutation_map = affine_map<(d0) -> (d0)>} :
- vector<2xf32>, memref<?xf32>
+ %base1 = arith.constant 1 : i32
+ amdgpu.raw_buffer_store { boundsCheck = false } %c -> %arg1[%base1]
+ : vector<2xf32> -> memref<?xf32>, i32
gpu.terminator
}
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst)
threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst, %block_z = %cst) {
%f0 = arith.constant 0.0: f32
- %base = arith.constant 0 : index
- %f = vector.transfer_read %arg0[%base], %f0
- {permutation_map = affine_map<(d0) -> (d0)>} :
- memref<?xf32>, vector<4xf32>
+ %base = arith.constant 0 : i32
+ %f = amdgpu.raw_buffer_load { boundsCheck = false } %arg0[%base]
+ : memref<?xf32>, i32 -> vector<4xf32>
%c = arith.addf %f, %f : vector<4xf32>
- vector.transfer_write %c, %arg1[%base]
- {permutation_map = affine_map<(d0) -> (d0)>} :
- vector<4xf32>, memref<?xf32>
+ amdgpu.raw_buffer_store { boundsCheck = false } %c -> %arg1[%base]
+ : vector<4xf32> -> memref<?xf32>, i32
gpu.terminator
}
":TosaToTensor",
":VectorToGPU",
":VectorToLLVM",
- ":VectorToROCDL",
":VectorToSCF",
":VectorToSPIRV",
],
)
cc_library(
- name = "VectorToROCDL",
- srcs = [
- "lib/Conversion/VectorToROCDL/VectorToROCDL.cpp",
- ":ConversionPassDetail",
- ],
- hdrs = ["include/mlir/Conversion/VectorToROCDL/VectorToROCDL.h"],
- includes = ["include"],
- deps = [
- ":ConversionPassIncGen",
- ":FuncDialect",
- ":FuncToLLVM",
- ":GPUDialect",
- ":IR",
- ":LLVMCommonConversion",
- ":LLVMDialect",
- ":MemRefToLLVM",
- ":Pass",
- ":ROCDLDialect",
- ":Transforms",
- ":VectorDialect",
- ],
-)
-
-cc_library(
name = "VectorToSPIRV",
srcs = glob([
"lib/Conversion/VectorToSPIRV/*.cpp",
":Transforms",
":VectorDialect",
":VectorToLLVM",
- ":VectorToROCDL",
":VectorToSCF",
"//llvm:Support",
],
":TransformsPassIncGen",
":VectorDialect",
":VectorToLLVM",
- ":VectorToROCDL",
":VectorToSCF",
":VectorToSPIRV",
":VectorTransforms",