// This file implements the conversion patterns from GPU ops to SPIR-V dialect.
//
//===----------------------------------------------------------------------===//
+#include "mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/LoopOps/LoopOps.h"
#include "mlir/Dialect/SPIRV/SPIRVDialect.h"
// GPU To SPIRV Patterns.
//===----------------------------------------------------------------------===//
-namespace mlir {
-void populateGPUToSPIRVPatterns(MLIRContext *context,
- SPIRVTypeConverter &typeConverter,
- OwningRewritePatternList &patterns,
- ArrayRef<int64_t> workGroupSize) {
+void mlir::populateGPUToSPIRVPatterns(MLIRContext *context,
+ SPIRVTypeConverter &typeConverter,
+ OwningRewritePatternList &patterns,
+ ArrayRef<int64_t> workGroupSize) {
patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
patterns.insert<
GPUReturnOpConversion, ForOpConversion, KernelModuleConversion,
spirv::BuiltIn::LocalInvocationId>>(context,
typeConverter);
}
-} // namespace mlir
PatternRewriter &rewriter) const override;
};
+} // namespace
+
/// Analyzes the `transfer` to find an access dimension along the fastest remote
/// MemRef dimension. If such a dimension with coalescing properties is found,
/// `pivs` and `vectorView` are swapped so that the invocation of
/// LoopNestBuilder captures it in the innermost loop.
template <typename TransferOpTy>
-void coalesceCopy(TransferOpTy transfer,
- SmallVectorImpl<edsc::ValueHandle *> *pivs,
- edsc::VectorView *vectorView) {
+static void coalesceCopy(TransferOpTy transfer,
+ SmallVectorImpl<edsc::ValueHandle *> *pivs,
+ edsc::VectorView *vectorView) {
// rank of the remote memory access, coalescing behavior occurs on the
// innermost memory dimension.
auto remoteRank = transfer.getMemRefType().getRank();
/// Emits remote memory accesses that are clipped to the boundaries of the
/// MemRef.
template <typename TransferOpTy>
-SmallVector<edsc::ValueHandle, 8> clip(TransferOpTy transfer,
- edsc::MemRefView &view,
- ArrayRef<edsc::IndexHandle> ivs) {
+static SmallVector<edsc::ValueHandle, 8> clip(TransferOpTy transfer,
+ edsc::MemRefView &view,
+ ArrayRef<edsc::IndexHandle> ivs) {
using namespace mlir::edsc;
using namespace edsc::op;
using edsc::intrinsics::select;
rewriter.eraseOp(op);
return matchSuccess();
}
-} // namespace
void mlir::populateVectorToAffineLoopsConversionPatterns(
MLIRContext *context, OwningRewritePatternList &patterns) {
#include "mlir/Dialect/QuantOps/FakeQuantSupport.h"
#include "mlir/Dialect/QuantOps/QuantTypes.h"
-namespace mlir {
-namespace quant {
-namespace {
-bool getDefaultStorageParams(unsigned numBits, bool narrowRange, bool isSigned,
- MLIRContext *ctx, Type &storageType, int64_t &qmin,
- int64_t &qmax) {
+using namespace mlir;
+using namespace mlir::quant;
+
+static bool getDefaultStorageParams(unsigned numBits, bool narrowRange,
+ bool isSigned, MLIRContext *ctx,
+ Type &storageType, int64_t &qmin,
+ int64_t &qmax) {
// Hard-coded type mapping from TFLite.
if (numBits <= 8) {
storageType = IntegerType::get(8, ctx);
// range will be outside the shifted range and be clamped during quantization.
// TODO(fengliuai): we should nudge the scale as well, but that requires the
// fake quant op used in the training to use the nudged scale as well.
-void getNudgedScaleAndZeroPoint(int64_t qmin, int64_t qmax, double rmin,
- double rmax, double &scale,
- int64_t &nudgedZeroPoint) {
+static void getNudgedScaleAndZeroPoint(int64_t qmin, int64_t qmax, double rmin,
+ double rmax, double &scale,
+ int64_t &nudgedZeroPoint) {
// Determine the scale.
const double qminDouble = qmin;
const double qmaxDouble = qmax;
assert(nudgedZeroPoint <= qmax);
}
-} // end namespace
-
-UniformQuantizedType fakeQuantAttrsToType(Location loc, unsigned numBits,
- double rmin, double rmax,
- bool narrowRange, Type expressedType,
- bool isSigned) {
+UniformQuantizedType
+mlir::quant::fakeQuantAttrsToType(Location loc, unsigned numBits, double rmin,
+ double rmax, bool narrowRange,
+ Type expressedType, bool isSigned) {
MLIRContext *ctx = expressedType.getContext();
unsigned flags = isSigned ? QuantizationFlags::Signed : 0;
Type storageType;
loc);
}
-UniformQuantizedPerAxisType
-fakeQuantAttrsToType(Location loc, unsigned numBits, int32_t quantizedDimension,
- ArrayRef<double> rmins, ArrayRef<double> rmaxs,
- bool narrowRange, Type expressedType, bool isSigned) {
+UniformQuantizedPerAxisType mlir::quant::fakeQuantAttrsToType(
+ Location loc, unsigned numBits, int32_t quantizedDimension,
+ ArrayRef<double> rmins, ArrayRef<double> rmaxs, bool narrowRange,
+ Type expressedType, bool isSigned) {
size_t axis_size = rmins.size();
if (axis_size != rmaxs.size()) {
return (emitError(loc, "mismatched per-axis min and max size: ")
flags, storageType, expressedType, scales, zeroPoints, quantizedDimension,
qmin, qmax, loc);
}
-
-} // namespace quant
-} // namespace mlir
#include "mlir/IR/Attributes.h"
#include "mlir/IR/StandardTypes.h"
-namespace mlir {
-namespace quant {
+using namespace mlir;
+using namespace mlir::quant;
+
/// Converts a possible primitive, real expressed value attribute to a
/// corresponding storage attribute (typically FloatAttr -> IntegerAttr).
/// quantizedElementType is the QuantizedType that describes the expressed
/// Converts a real expressed Attribute to a corresponding Attribute containing
/// quantized storage values assuming the given uniform quantizedElementType and
/// converter.
-Attribute quantizeAttrUniform(Attribute realValue,
- UniformQuantizedType quantizedElementType,
- const UniformQuantizedValueConverter &converter,
- Type &outConvertedType) {
+Attribute mlir::quant::quantizeAttrUniform(
+ Attribute realValue, UniformQuantizedType quantizedElementType,
+ const UniformQuantizedValueConverter &converter, Type &outConvertedType) {
// Fork to handle different variants of constants supported.
if (realValue.isa<DenseFPElementsAttr>()) {
// Dense tensor or vector constant.
/// quantizedElementType.getStorageType().
/// Returns nullptr if the conversion is not supported.
/// On success, stores the converted type in outConvertedType.
-Attribute quantizeAttr(Attribute realValue, QuantizedType quantizedElementType,
- Type &outConvertedType) {
+Attribute mlir::quant::quantizeAttr(Attribute realValue,
+ QuantizedType quantizedElementType,
+ Type &outConvertedType) {
if (auto uniformQuantized =
quantizedElementType.dyn_cast<UniformQuantizedType>()) {
UniformQuantizedValueConverter converter(uniformQuantized);
return nullptr;
}
}
-
-} // namespace quant
-} // namespace mlir
return static_cast<ImplType *>(impl)->expr;
}
-namespace mlir {
-namespace ops_assertions {
-
-SDBMExpr operator+(SDBMExpr lhs, SDBMExpr rhs) {
+SDBMExpr mlir::ops_assertions::operator+(SDBMExpr lhs, SDBMExpr rhs) {
if (auto folded = foldSumDiff(lhs, rhs))
return folded;
assert(!(lhs.isa<SDBMNegExpr>() && rhs.isa<SDBMNegExpr>()) &&
return addConstant(lhs.cast<SDBMVaryingExpr>(), rhsConstant.getValue());
}
-SDBMExpr operator-(SDBMExpr lhs, SDBMExpr rhs) {
+SDBMExpr mlir::ops_assertions::operator-(SDBMExpr lhs, SDBMExpr rhs) {
// Fold x - x == 0.
if (lhs == rhs)
return SDBMConstantExpr::get(lhs.getDialect(), 0);
return buildDiffExpr(lhs.cast<SDBMDirectExpr>(), (-rhs).cast<SDBMNegExpr>());
}
-SDBMExpr stripe(SDBMExpr expr, SDBMExpr factor) {
+SDBMExpr mlir::ops_assertions::stripe(SDBMExpr expr, SDBMExpr factor) {
auto constantFactor = factor.cast<SDBMConstantExpr>();
assert(constantFactor.getValue() > 0 && "non-positive stripe");
return SDBMStripeExpr::get(expr.cast<SDBMDirectExpr>(), constantFactor);
}
-
-} // namespace ops_assertions
-} // namespace mlir
private:
void runOnModule() override;
};
+} // namespace
void DecorateSPIRVCompositeTypeLayoutPass::runOnModule() {
auto module = getModule();
}
}
}
-} // namespace
std::unique_ptr<OpPassBase<ModuleOp>>
mlir::spirv::createDecorateSPIRVCompositeTypeLayoutPass() {
using llvm::orc::ThreadSafeModule;
using llvm::orc::TMOwningSimpleCompiler;
-// Wrap a string into an llvm::StringError.
-static inline Error make_string_error(const Twine &message) {
+/// Wrap a string into an llvm::StringError.
+static Error make_string_error(const Twine &message) {
return llvm::make_error<StringError>(message.str(),
llvm::inconvertibleErrorCode());
}
-namespace mlir {
-
void SimpleObjectCache::notifyObjectCompiled(const Module *M,
MemoryBufferRef ObjBuffer) {
cachedObjects[M->getModuleIdentifier()] = MemoryBuffer::getMemBufferCopy(
return Error::success();
}
-} // end namespace mlir
return false;
}
-namespace mlir {
-namespace quantizer {
-
-raw_ostream &operator<<(raw_ostream &os, const TensorAxisStatistics &stats) {
+raw_ostream &mlir::quantizer::operator<<(raw_ostream &os,
+ const TensorAxisStatistics &stats) {
os << "STATS[sampleSize=" << stats.sampleSize << ", min=" << stats.minValue
<< ", maxValue=" << stats.maxValue << ", mean=" << stats.mean
<< ", variance=" << stats.variance << "]";
return os;
}
-
-} // end namespace quantizer
-} // end namespace mlir
return (xq - zp) * delta;
}
-namespace mlir {
-namespace quantizer {
-
-raw_ostream &operator<<(raw_ostream &os, const UniformStorageParams &p) {
+raw_ostream &mlir::quantizer::operator<<(raw_ostream &os,
+ const UniformStorageParams &p) {
os << "UniformStorageParams{" << p.numLevels << ", " << p.minValue << "}";
return os;
}
-raw_ostream &operator<<(raw_ostream &os,
- const UniformParamsFromMinMaxSolver &s) {
+raw_ostream &
+mlir::quantizer::operator<<(raw_ostream &os,
+ const UniformParamsFromMinMaxSolver &s) {
os << "UniformParamsFromMinMaxSolver(" << s.getStepCount() << "){";
os << "(" << s.getBoundingMin() << ":" << s.getBoundingMax() << ") -> ";
if (!s.isSatisfied()) {
return os;
}
-
-} // end namespace quantizer
-} // end namespace mlir
using namespace mlir;
-namespace {
static llvm::Value *createIntrinsicCall(llvm::IRBuilder<> &builder,
llvm::Intrinsic::ID intrinsic,
ArrayRef<llvm::Value *> args = {}) {
: llvm::Intrinsic::nvvm_shfl_sync_bfly_i32;
}
+namespace {
class ModuleTranslation : public LLVM::ModuleTranslation {
public:
using namespace mlir;
-namespace {
// Create a call to llvm intrinsic
static llvm::Value *createIntrinsicCall(llvm::IRBuilder<> &builder,
llvm::Intrinsic::ID intrinsic,
return builder.CreateCall(fn, ArrayRef<llvm::Value *>(fn_op0));
}
+namespace {
class ModuleTranslation : public LLVM::ModuleTranslation {
public:
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/Cloning.h"
-namespace mlir {
-namespace LLVM {
+using namespace mlir;
+using namespace mlir::LLVM;
-// Create an LLVM IR constant of `llvmType` from the MLIR attribute `attr`.
-// This currently supports integer, floating point, splat and dense element
-// attributes and combinations thereof. In case of error, report it to `loc`
-// and return nullptr.
+/// Create an LLVM IR constant of `llvmType` from the MLIR attribute `attr`.
+/// This currently supports integer, floating point, splat and dense element
+/// attributes and combinations thereof. In case of error, report it to `loc`
+/// and return nullptr.
llvm::Constant *ModuleTranslation::getLLVMConstant(llvm::Type *llvmType,
Attribute attr,
Location loc) {
return nullptr;
}
-// Convert MLIR integer comparison predicate to LLVM IR comparison predicate.
+/// Convert MLIR integer comparison predicate to LLVM IR comparison predicate.
static llvm::CmpInst::Predicate getLLVMCmpPredicate(ICmpPredicate p) {
switch (p) {
case LLVM::ICmpPredicate::eq:
llvm_unreachable("incorrect comparison predicate");
}
-// Given a single MLIR operation, create the corresponding LLVM IR operation
-// using the `builder`. LLVM IR Builder does not have a generic interface so
-// this has to be a long chain of `if`s calling different functions with a
-// different number of arguments.
+/// Given a single MLIR operation, create the corresponding LLVM IR operation
+/// using the `builder`. LLVM IR Builder does not have a generic interface so
+/// this has to be a long chain of `if`s calling different functions with a
+/// different number of arguments.
LogicalResult ModuleTranslation::convertOperation(Operation &opInst,
llvm::IRBuilder<> &builder) {
auto extractPosition = [](ArrayAttr attr) {
<< opInst.getName();
}
-// Convert block to LLVM IR. Unless `ignoreArguments` is set, emit PHI nodes
-// to define values corresponding to the MLIR block arguments. These nodes
-// are not connected to the source basic blocks, which may not exist yet.
+/// Convert block to LLVM IR. Unless `ignoreArguments` is set, emit PHI nodes
+/// to define values corresponding to the MLIR block arguments. These nodes
+/// are not connected to the source basic blocks, which may not exist yet.
LogicalResult ModuleTranslation::convertBlock(Block &bb, bool ignoreArguments) {
llvm::IRBuilder<> builder(blockMapping[&bb]);
return success();
}
-// Convert the LLVM dialect linkage type to LLVM IR linkage type.
+/// Convert the LLVM dialect linkage type to LLVM IR linkage type.
llvm::GlobalVariable::LinkageTypes convertLinkageType(LLVM::Linkage linkage) {
switch (linkage) {
case LLVM::Linkage::Private:
llvm_unreachable("unknown linkage type");
}
-// Create named global variables that correspond to llvm.mlir.global
-// definitions.
+/// Create named global variables that correspond to llvm.mlir.global
+/// definitions.
void ModuleTranslation::convertGlobals() {
for (auto op : getModuleBody(mlirModule).getOps<LLVM::GlobalOp>()) {
llvm::Type *type = op.getType().getUnderlyingType();
}
}
-// Get the SSA value passed to the current block from the terminator operation
-// of its predecessor.
+/// Get the SSA value passed to the current block from the terminator operation
+/// of its predecessor.
static Value *getPHISourceValue(Block *current, Block *pred,
unsigned numArguments, unsigned index) {
auto &terminator = *pred->getTerminator();
}
}
-// Sort function blocks topologically.
+/// Sort function blocks topologically.
static llvm::SetVector<Block *> topologicalSort(LLVMFuncOp f) {
// For each blocks that has not been visited yet (i.e. that has no
// predecessors), add it to the list and traverse its successors in DFS
return llvmModule;
}
-
-} // namespace LLVM
-} // namespace mlir
maximalFusion);
}
+// TODO(b/117228571) Replace when this is modeled through side-effects/op traits
+static bool isMemRefDereferencingOp(Operation &op) {
+ if (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op) ||
+ isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op))
+ return true;
+ return false;
+}
+
namespace {
// LoopNestStateCollector walks loop nests and collects load and store
}
};
-// TODO(b/117228571) Replace when this is modeled through side-effects/op traits
-static bool isMemRefDereferencingOp(Operation &op) {
- if (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op) ||
- isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op))
- return true;
- return false;
-}
-
// MemRefDependenceGraph is a graph data structure where graph nodes are
// top-level operations in a FuncOp which contain load/store ops, and edges
// are memref dependences between the nodes.
void dump() const { print(llvm::errs()); }
};
+} // end anonymous namespace
+
// Initializes the data dependence graph by walking operations in 'f'.
// Assigns each node in the graph a node id based on program order in 'f'.
// TODO(andydavis) Add support for taking a Block arg to construct the
}
// TODO(mlir-team): improve/complete this when we have target data.
-unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
+static unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
auto elementType = memRefType.getElementType();
unsigned sizeInBits;
return true;
}
+namespace {
+
// GreedyFusion greedily fuses loop nests which have a producer/consumer or
// input-reuse relationship on a memref, with the goal of improving locality.
//